Spaces:

eddmpython
/

dartlab

Sleeping

App Files Files Community

github-actions[bot] commited on 4 days ago

Commit

6254e2b

1 Parent(s): bc7389c

sync from 4c3ef19

Browse files

Files changed (35) hide show

Dockerfile +22 -2
README_PROJECT.md +0 -1108
pyproject.toml +1 -0
src/dartlab/ai/DEV.md +72 -0
src/dartlab/ai/context/builder.py +66 -4
src/dartlab/ai/context/pruning.py +95 -0
src/dartlab/ai/conversation/prompts.py +34 -7
src/dartlab/ai/conversation/templates/analysisPhilosophy.py +57 -0
src/dartlab/ai/eval/batchResults/batch_ollama_20260327_124945.jsonl +35 -0
src/dartlab/ai/eval/batchResults/batch_ollama_20260327_131602.jsonl +4 -0
src/dartlab/ai/eval/batchResults/batch_ollama_20260327_132810.jsonl +11 -0
src/dartlab/ai/eval/diagnosisReports/diagnosis_batch_20260327_124945.md +21 -0
src/dartlab/ai/eval/diagnosisReports/diagnosis_batch_20260327_131602.md +15 -0
src/dartlab/ai/memory/__init__.py +8 -0
src/dartlab/ai/memory/store.py +154 -0
src/dartlab/ai/memory/summarizer.py +55 -0
src/dartlab/ai/providers/oauth_codex.py +57 -11
src/dartlab/ai/runtime/agent.py +124 -7
src/dartlab/ai/runtime/core.py +40 -2
src/dartlab/ai/runtime/run_modes.py +87 -2
src/dartlab/ai/runtime/scratchpad.py +115 -0
src/dartlab/ai/skills/__init__.py +9 -0
src/dartlab/ai/skills/catalog.py +145 -0
src/dartlab/ai/skills/registry.py +58 -0
src/dartlab/ai/tools/defaults/helpers.py +5 -2
src/dartlab/cli/commands/chat.py +472 -0
src/dartlab/cli/commands/collect.py +51 -0
src/dartlab/cli/parser.py +1 -0
src/dartlab/core/dataConfig.py +4 -0
src/dartlab/core/dataLoader.py +5 -2
src/dartlab/market/_helpers.py +78 -5
src/dartlab/market/scan/__init__.py +5 -0
src/dartlab/market/scan/builder.py +436 -0
src/dartlab/providers/dart/_sections_source.py +151 -1
src/dartlab/review/builders.py +9 -12

Dockerfile CHANGED Viewed

@@ -4,14 +4,34 @@ WORKDIR /app
 RUN apt-get update && apt-get install -y --no-install-recommends \
     build-essential \
     && rm -rf /var/lib/apt/lists/*
-COPY pyproject.toml README.md ./
 COPY src/ src/
-RUN pip install --no-cache-dir -e ".[ai]"
 ENV SPACE_ID=1
 EXPOSE 7860

 RUN apt-get update && apt-get install -y --no-install-recommends \
     build-essential \
+    libxml2-dev \
+    libxslt1-dev \
     && rm -rf /var/lib/apt/lists/*
+# 핵심 의존성만 먼저 설치 (wheel 우선, 빌드 실패 방지)
+RUN pip install --no-cache-dir \
+    polars \
+    beautifulsoup4 lxml \
+    httpx requests orjson \
+    openpyxl rich plotly \
+    prompt-toolkit \
+    alive-progress \
+    diff-match-patch \
+    fastapi uvicorn[standard] sse-starlette msgpack
+COPY pyproject.toml ./
 COPY src/ src/
+RUN touch README.md
+# --no-deps: 위에서 이미 설치한 의존성 재설치 방지, marimo/mcp 건너뜀
+RUN pip install --no-cache-dir --no-deps -e .
+# HF Spaces user
+RUN useradd -m -u 1000 user
+USER user
 ENV SPACE_ID=1
+ENV HOME=/home/user
 EXPOSE 7860

README_PROJECT.md DELETED Viewed

@@ -1,1108 +0,0 @@
-<div align="center">
-<br>
-<img alt="DartLab" src=".github/assets/logo.png" width="180">
-<h3>DartLab</h3>
-<p><b>One stock code. The whole story.</b></p>
-<p>DART + EDGAR filings, structured and comparable — in one line of Python.</p>
-<p>
-<a href="https://pypi.org/project/dartlab/"><img src="https://img.shields.io/pypi/v/dartlab?style=for-the-badge&color=ea4647&labelColor=050811&logo=pypi&logoColor=white" alt="PyPI"></a>
-<a href="https://pypi.org/project/dartlab/"><img src="https://img.shields.io/pypi/pyversions/dartlab?style=for-the-badge&color=c83232&labelColor=050811&logo=python&logoColor=white" alt="Python"></a>
-<a href="LICENSE"><img src="https://img.shields.io/badge/License-MIT-94a3b8?style=for-the-badge&labelColor=050811" alt="License"></a>
-<a href="https://github.com/eddmpython/dartlab/actions/workflows/ci.yml"><img src="https://img.shields.io/github/actions/workflow/status/eddmpython/dartlab/ci.yml?branch=master&style=for-the-badge&labelColor=050811&logo=github&logoColor=white&label=CI" alt="CI"></a>
-<a href="https://eddmpython.github.io/dartlab/"><img src="https://img.shields.io/badge/Docs-GitHub_Pages-38bdf8?style=for-the-badge&labelColor=050811&logo=github-pages&logoColor=white" alt="Docs"></a>
-<a href="https://eddmpython.github.io/dartlab/blog/"><img src="https://img.shields.io/badge/Blog-120%2B_Articles-fbbf24?style=for-the-badge&labelColor=050811&logo=rss&logoColor=white" alt="Blog"></a>
-</p>
-<p>
-<a href="https://eddmpython.github.io/dartlab/">Docs</a> · <a href="https://eddmpython.github.io/dartlab/blog/">Blog</a> · <a href="https://huggingface.co/spaces/eddmpython/dartlab">Live Demo</a> · <a href="notebooks/marimo/">Marimo Notebooks</a> · <a href="https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/01_quickstart.ipynb">Open in Colab</a> · <a href="README_KR.md">한국어</a> · <a href="https://buymeacoffee.com/eddmpython">Sponsor</a>
-</p>
-<p>
-<a href="https://huggingface.co/datasets/eddmpython/dartlab-data"><img src="https://img.shields.io/badge/Data-HuggingFace-ffd21e?style=for-the-badge&labelColor=050811&logo=huggingface&logoColor=white" alt="HuggingFace Data"></a>
-</p>
-</div>
-> **Note:** DartLab is under active development. APIs may change between versions, and documentation may lag behind the latest code.
-## Install
-Requires **Python 3.12+**.
-```bash
-# Core — financial statements, sections, Company
-uv add dartlab
-# or with pip
-pip install dartlab
-```
-### Optional Extras
-Install only what you need:
-```bash
-uv add "dartlab[ai]"              # web UI, server, streaming (FastAPI + uvicorn)
-uv add "dartlab[llm]"             # LLM analysis (OpenAI)
-uv add "dartlab[charts]"          # Plotly charts, network graphs (plotly + networkx + scipy)
-uv add "dartlab[mcp]"             # MCP server for Claude Desktop / Code / Cursor
-uv add "dartlab[channel]"         # web UI + cloudflared tunnel sharing
-uv add "dartlab[channel-ngrok]"   # web UI + ngrok tunnel sharing
-uv add "dartlab[channel-full]"    # all channels + Telegram / Slack / Discord bots
-uv add "dartlab[all]"             # everything above (except channel bots)
-```
-**Common combinations:**
-```bash
-# financial analysis + AI chat
-uv add "dartlab[ai,llm]"
-# full analysis suite — charts, AI, LLM
-uv add "dartlab[ai,llm,charts]"
-# share analysis with team via tunnel
-uv add "dartlab[channel]"
-```
-### From Source
-```bash
-git clone https://github.com/eddmpython/dartlab.git
-cd dartlab && uv pip install -e ".[all]"
-# or with pip
-pip install -e ".[all]"
-```
-PyPI releases are published only when the core is stable. If you want the latest features (including experimental ones like audit, forecast, valuation), clone the repo directly — but expect occasional breaking changes.
-### Desktop App (Alpha)
-Skip all installation steps — download the standalone Windows launcher:
-- **[Download DartLab.exe](https://github.com/eddmpython/dartlab-desktop/releases/latest/download/DartLab.exe)** from [dartlab-desktop](https://github.com/eddmpython/dartlab-desktop)
-- Also available from the [DartLab landing page](https://eddmpython.github.io/dartlab/)
-One-click launch — no Python, no terminal, no package manager required. The desktop app bundles the web UI with a built-in Python runtime.
-> **Alpha** — functional but incomplete. The desktop app is a Windows-only `.exe` launcher. macOS/Linux are not yet supported.
----
-**No data setup required.** When you create a `Company`, dartlab automatically downloads the required data from [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data) (DART) or SEC API (EDGAR). The second run loads instantly from local cache.
-## Quick Start
-Pick any company. Get the whole picture.
-```python
-import dartlab
-# Samsung Electronics — from raw filings to structured data
-c = dartlab.Company("005930")
-c.sections                      # every topic, every period, side by side
-c.show("businessOverview")      # what this company actually does
-c.diff("businessOverview")      # what changed since last year
-c.BS                            # standardized balance sheet
-c.ratios                        # 47 financial ratios, already calculated
-# Apple — same interface, different country
-us = dartlab.Company("AAPL")
-us.show("business")
-us.ratios
-# No code needed — ask in natural language
-dartlab.ask("Analyze Samsung Electronics financial health")
-```
-## What DartLab Is
-A public company files hundreds of pages every quarter. Inside those pages is everything — revenue trends, risk warnings, management strategy, competitive position. The complete truth about a company, written by the company itself.
-Nobody reads it.
-Not because they don't want to. Because the same information is named differently by every company, structured differently every year, and scattered across formats designed for regulators, not readers. The same "revenue" appears as `ifrs-full_Revenue`, `dart_Revenue`, `SalesRevenue`, or dozens of Korean variations.
-DartLab changes who can access this information. Two engines turn raw filings into one comparable map:
-### The Two Problems DartLab Solves
-**1. The same company says different things differently every year.**
-Sections horizontalization normalizes every disclosure section into a **topic × period** grid. Different titles across years and industries all resolve to the same canonical topic:
-```
-                    2025Q4    2024Q4    2024Q3    2023Q4    …
-companyOverview       ✓         ✓         ✓         ✓
-businessOverview      ✓         ✓         ✓         ✓
-productService        ✓         ✓         ✓         ✓
-salesOrder            ✓         ✓         —         ✓
-employee              ✓         ✓         ✓         ✓
-dividend              ✓         ✓         ✓         ✓
-audit                 ✓         ✓         ✓         ✓
-…                    (98 canonical topics)
-```
-```
-Before (raw section titles):              After (canonical topic):
-Samsung    "II. 사업의 내용"               → businessOverview
-Hyundai    "II. 사업의 내용 [자동차부문]"   → businessOverview
-Kakao      "2. 사업의 내용"               → businessOverview
-```
-The mapping pipeline: **text normalization** → **545 hardcoded title mappings** → **73 regex patterns** → canonical topic. ~95%+ mapping rate across all listed companies. Each cell keeps the full text with heading/body separation, tables, and original evidence. Comparing "what did the company say about risk last year vs. this year" becomes a single `diff()` call.
-**2. Every company names the same number differently.**
-Account standardization normalizes every XBRL account through a 4-step pipeline:
-```
-Raw XBRL account_id
-  → Strip prefixes (ifrs-full_, dart_, ifrs_, ifrs-smes_)
-  → English ID synonyms (59 rules)
-  → Korean name synonyms (104 rules)
-  → Learned mapping table (34,249 entries)
-  → Result: revenue, operatingIncome, totalAssets, …
-```
-```
-Before (raw XBRL):                          After (standardized):
-Company     account_id          account_nm   →  snakeId    label
-Samsung     ifrs-full_Revenue   수익(매출액)  →  revenue    매출액
-SK Hynix    dart_Revenue        매출액       →  revenue    매출액
-LG Energy   Revenue             매출         →  revenue    매출액
-```
-~97% mapping rate. Cross-company comparison requires zero manual work. Combined with `scanAccount` / `scanRatio`, you can compare a single metric across **2,700+ companies** in one call.
-### Principles — Accessibility and Reliability
-These two principles govern every public API:
-**Accessibility** — One stock code is all you need. `import dartlab` provides access to every feature. No internal DTOs, no extra imports, no data setup. `Company("005930")` auto-downloads from [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data).
-**Reliability** — Numbers are raw originals from DART/EDGAR. Missing data returns `None`, never a guess. `trace(topic)` shows which source was chosen and why. Errors are never swallowed.
-### Company — The Merged Map
-`Company` uses `sections` as the spine, then overlays stronger data sources:
-```
-Layer         What it provides                   Priority
-─────────────────────────────────────────────────────────
-docs          Section text, tables, evidence      Base spine
-finance       BS, IS, CF, ratios, time series     Replaces numeric topics
-report        28 structured APIs (DART only)      Fills structured topics
-─────────────────────────────────────────────────────────
-profile       Merged view (default for users)     Highest
-```
-```python
-c.docs.sections     # pure text source (sections spine)
-c.finance.BS        # authoritative financial statements
-c.report.extract()  # structured DART API data
-c.profile.sections  # merged view — what users see by default
-```
-`c.sections` is the merged view. `c.trace("BS")` tells you which source was chosen and why.
-### Architecture — Layered by Responsibility
-DartLab follows a strict layered architecture where each layer only depends on layers below it:
-```
-L0  core/        Protocols, finance utils, docs utils, registry
-L1  providers/   Country-specific data (DART, EDGAR, EDINET)
-    gather/      External market data (Naver, Yahoo, FRED)
-    market/      Market-wide scanning (2,700+ companies)
-L2  analysis/    Analytical engines (valuation, risk, insights, event study)
-L3  ai/          LLM-powered analysis (9 providers)
-```
-Import direction is enforced by CI — no reverse dependencies allowed.
-### Extensibility — Zero Core Modification
-Adding a new country requires zero changes to core code:
-1. Create a provider package under `providers/`
-2. Implement `canHandle(code) -> bool` and `priority() -> int`
-3. Register via `entry_points` in `pyproject.toml`
-```python
-dartlab.Company("005930")  # → DART provider (priority 10)
-dartlab.Company("AAPL")    # → EDGAR provider (priority 20)
-```
-The facade iterates providers by priority — first match wins. This follows the same pattern as OpenBB's provider system and scikit-learn's estimator registration.
-## Core Features
-### Show, Trace, Diff
-```python
-c = dartlab.Company("005930")
-# show — open any topic with source-aware priority
-c.show("BS")                # → finance DataFrame
-c.show("overview")          # → sections-based text + tables
-c.show("dividend")          # → report DataFrame (all quarters)
-c.show("IS", period=["2024Q4", "2023Q4"])  # compare specific periods
-# trace — why a topic came from docs, finance, or report
-c.trace("BS")               # → {"primarySource": "finance", ...}
-# diff — text change detection (3 modes)
-c.diff()                                    # full summary
-c.diff("businessOverview")                  # topic history
-c.diff("businessOverview", "2024", "2025")  # line-by-line diff
-```
-What the output looks like:
-```
->>> c.show("businessOverview")
-shape: (12, 5)
-┌───────────┬──────────┬──────────────────────────────┬──────────────────────────────┐
-│ blockType │ nodeType │ 2024                         │ 2023                         │
-├───────────┼──────────┼──────────────────────────────┼──────────────────────────────┤
-│ text      │ heading  │ 1. 산업의 특성                │ 1. 산업의 특성                │
-│ text      │ body     │ 반도체 산업은 기술 집약적 …   │ 반도체 산업은 기술 집약적 …    │
-│ table     │ null     │ DataFrame(5×3)               │ DataFrame(5×3)               │
-└───────────┴──────────┴──────────────────────────────┴──────────────────────────────┘
->>> c.diff("businessOverview", "2023", "2024")
-┌──────────┬─────────────────────────────────────────────┐
-│ status   │ text                                        │
-├──────────┼─────────────────────────────────────────────┤
-│ added    │ AI 반도체 수요 급증에 따른 HBM 매출 확대 …   │
-│ modified │ 매출액 258.9조원 → 300.9조원                 │
-│ removed  │ 반도체 부문 수익성 악화 우려 …               │
-└──────────┴─────────────────────────────────────────────┘
-```
-### Finance
-```python
-c.BS                    # balance sheet (account × period, newest first)
-c.IS                    # income statement
-c.CF                    # cash flow
-c.ratios                # ratio time series DataFrame (6 categories × period)
-c.finance.ratioSeries   # ratio time series across years
-c.finance.timeseries    # raw account time series
-c.annual                # annual time series
-c.filings()             # disclosure document list (Tier 1 Stable)
-```
-All accounts are normalized through the 4-step standardization pipeline — Samsung's `revenue` and LG's `revenue` are the same `snakeId`. Ratios cover 6 categories: profitability, stability, growth, efficiency, cashflow, and valuation.
-### Market-wide Financial Screening
-Scan a single account or ratio across **all listed companies** in one call — 2,700+ DART firms or 500+ EDGAR firms. Returns a wide Polars DataFrame (rows = companies, columns = periods, newest first).
-```python
-import dartlab
-# scan a single account across all listed companies
-dartlab.scanAccount("매출액")                         # revenue, quarterly standalone
-dartlab.scanAccount("operating_profit", annual=True)  # annual basis
-dartlab.scanAccount("total_assets", market="edgar")   # US EDGAR
-# scan a ratio across all listed companies
-dartlab.scanRatio("roe")                              # quarterly ROE for all firms
-dartlab.scanRatio("debtRatio", annual=True)           # annual debt-to-equity
-# list available ratios (13 ratios: profitability, stability, growth, efficiency, cashflow)
-dartlab.scanRatioList()
-```
-Accepts both Korean names (`매출액`) and English snakeIds (`sales`) — same 4-step normalization as Company finance. Reads 2,700+ parquet files in parallel via ThreadPool, typically completes in ~3 seconds.
-> **Requires pre-downloaded data.** Market-wide functions (`scanAccount`, `screen`, `digest`, etc.) operate on local data — individual `Company()` calls only download one firm at a time. Download all data first:
-> ```python
-> pip install dartlab[hf]
-> dartlab.downloadAll("finance")   # ~600 MB, 2,700+ firms
-> dartlab.downloadAll("report")    # ~320 MB (governance/workforce/capital/debt)
-> dartlab.downloadAll("docs")      # ~8 GB (digest/signal — large)
-> ```
-## Review — Structured Company Analysis
-> **Experimental** — the review system is under active development. Templates, blocks, and output formats may change between versions.
-DartLab's review system assembles financial data into structured, readable reports.
-### Templates
-Pre-built block combinations that cover key analysis areas:
-```python
-c = dartlab.Company("005930")
-c.review("수익구조")    # revenue structure — segments, growth, concentration
-c.review("자금조달")    # capital structure — debt, liquidity, interest burden
-c.review()             # all templates
-```
-### Block Assembly
-Every review is built from reusable blocks. Get the full block dictionary and assemble your own:
-```python
-from dartlab.review import blocks, Review
-b = blocks(c)          # dict of 16 pre-built blocks
-list(b.keys())         # → ["profile", "segmentComposition", "growth", ...]
-# pick what you need
-Review([
-    b["segmentComposition"],
-    b["growth"],
-    c.select("IS", ["매출액"]),   # mix with raw data
-])
-```
-### Reviewer — AI Layer
-Add LLM-powered opinions on top of data blocks. Works with any provider:
-```python
-c.reviewer()                                    # all sections + AI opinion
-c.reviewer("수익구조")                           # single section + AI
-c.reviewer(guide="Evaluate from semiconductor cycle perspective")  # custom guide
-```
-**Free AI providers** — no paid API key required:
-| Provider | Setup |
-|----------|-------|
-| Gemini | `dartlab setup gemini` |
-| Groq | `dartlab setup groq` |
-| Cerebras | `dartlab setup cerebras` |
-| Mistral | `dartlab setup mistral` |
-Or use any OpenAI-compatible endpoint:
-```bash
-dartlab setup custom --base-url http://localhost:11434/v1   # Ollama local
-```
-### Customization
-- **Templates**: Pre-defined block combinations (`수익구조`, `자금조달`)
-- **Free assembly**: Mix any blocks + raw DataFrames in `Review([...])`
-- **Guide**: Pass `guide="..."` to `c.reviewer()` for domain-specific AI analysis
-- **Layout**: `ReviewLayout(indentH1=2, gapAfterH1=1, ...)` for rendering control
-- **Render formats**: `review.render("rich" | "html" | "markdown" | "json")`
-See [notebooks/marimo/sampleReview.py](notebooks/marimo/sampleReview.py) for interactive examples.
-## Additional Features
-> Features below are **beta** or **experimental** — APIs may change. See [stability](docs/stability.md).
-### Insights (beta)
-> **Beta** — API may change after a warning. See [stability](docs/stability.md).
-```python
-c.insights                      # 10-area analysis
-c.insights.grades()             # → {"performance": "A", "profitability": "B", …}
-c.insights.performance.grade    # → "A"
-c.insights.performance.details  # → ["Revenue growth +8.3%", …]
-c.insights.anomalies            # → outliers and red flags
-# distress scorecard — 6-model bankruptcy/fraud prediction
-c.insights.distress             # Altman Z-Score, Beneish M-Score, Ohlson O-Score,
-                                # Merton Distance-to-Default, Piotroski F-Score, Sloan Ratio
-```
-### Valuation, Forecast & Simulation
-```python
-dartlab.valuation("005930")           # DCF + DDM + relative valuation
-dartlab.forecast("005930")            # revenue forecast (4-source ensemble)
-dartlab.simulation("005930")          # scenario simulation (macro presets)
-# also available as Company methods
-c.valuation()
-c.forecast(horizon=3)
-c.simulation(scenarios=["adverse", "rate_hike"])
-```
-Auto-detects currency — KRW for DART companies, USD for EDGAR. Works with both `dartlab.valuation("AAPL")` and `dartlab.valuation("005930")`.
-### Audit (beta)
-> **Beta** — API may change after a warning. See [stability](docs/stability.md).
-```python
-dartlab.audit("005930")               # 11 red flag detectors
-# Benford's Law (digit distribution), auditor change (PCAOB AS 3101),
-# going concern (ISA 570), internal control (SOX 302/404),
-# revenue quality (Dechow & Dichev), Merton default probability, ...
-```
-### Market Intelligence (beta)
-> **Beta** — API may change after a warning. See [stability](docs/stability.md).
-```python
-dartlab.digest()                      # market-wide disclosure change digest
-dartlab.digest(sector="반도체")        # sector filter
-dartlab.groupHealth()                 # group health: network × financial ratios
-```
-### Modules
-DartLab exposes 100+ modules across 6 categories:
-```bash
-dartlab modules                      # list all modules
-dartlab modules --category finance   # filter by category
-dartlab modules --search dividend    # search by keyword
-```
-```python
-c.topics    # list all available topics for this company
-```
-Categories: `finance` (statements, ratios), `report` (dividend, governance, audit), `notes` (K-IFRS annotations), `disclosure` (narrative text), `analysis` (insights, rankings), `raw` (original parquets).
-### Charts & Visualization (beta)
-> **Beta** — API may change after a warning. See [stability](docs/stability.md).
-```python
-c = dartlab.Company("005930")
-# one-liner Plotly charts
-dartlab.chart.revenue(c).show()          # revenue + operating margin combo
-dartlab.chart.cashflow(c).show()         # operating/investing/financing CF
-dartlab.chart.dividend(c).show()         # DPS + yield + payout ratio
-dartlab.chart.profitability(c).show()    # ROE, operating margin, net margin
-# auto-detect all available charts
-specs = dartlab.chart.auto_chart(c)
-dartlab.chart.chart_from_spec(specs[0]).show()
-# generic charts from any DataFrame
-dartlab.chart.line(c.dividend, y=["dps"])
-dartlab.chart.bar(df, x="year", y=["revenue", "operating_income"], stacked=True)
-```
-Data tools:
-```python
-dartlab.table.yoy_change(c.dividend, value_cols=["dps"])       # add YoY% columns
-dartlab.table.format_korean(c.BS, unit="백만원")                # 1.2조원, 350억원
-dartlab.table.summary_stats(c.dividend, value_cols=["dps"])     # mean/CAGR/trend
-dartlab.text.extract_keywords(narrative)                        # frequency-based keywords
-dartlab.text.sentiment_indicators(narrative)                     # positive/negative/risk
-```
-Install chart dependencies: `uv add "dartlab[charts]"`
-### Network — Affiliate Map (beta)
-> **Beta** — API may change after a warning. See [stability](docs/stability.md).
-```python
-c = dartlab.Company("005930")
-# interactive vis.js graph in browser
-c.network().show()           # ego view (1 hop)
-c.network(hops=2).show()     # 2-hop neighborhood
-# DataFrame views
-c.network("members")     # group affiliates
-c.network("edges")       # investment/shareholder connections
-c.network("cycles")      # circular ownership paths
-# full market network
-dartlab.network().show()
-```
-### Market Scan (beta)
-> **Beta** — API may change after a warning. See [stability](docs/stability.md).
-```python
-c = dartlab.Company("005930")
-# one company → market-wide
-c.governance()           # single company
-c.governance("all")      # full market DataFrame
-dartlab.governance()     # module-level scan
-dartlab.workforce()
-dartlab.capital()
-dartlab.debt()
-# screening & benchmarking
-dartlab.screen()         # multi-factor screening
-dartlab.benchmark()      # peer comparison
-dartlab.signal()         # change detection signals
-```
-### Market Data Collection (beta)
-> **Beta** — API may change after a warning. See [stability](docs/stability.md).
-The Gather engine collects external market data as **Polars DataFrames** — timeseries by default. Every request goes through automatic fallback chains, circuit breaker isolation, and TTL caching. All methods are synchronous — async parallel execution is handled internally.
-```python
-import dartlab
-# OHLCV timeseries — adjusted prices, 6000+ trading days in a single request
-dartlab.price("005930")                         # KR: 1-year default, Polars DataFrame
-dartlab.price("005930", start="2015-01-01")     # custom range
-dartlab.price("AAPL", market="US")              # US via Yahoo Finance chart API
-dartlab.price("005930", snapshot=True)          # opt-in: current price snapshot
-# supply/demand flow timeseries (KR only)
-dartlab.flow("005930")                          # DataFrame (date, foreignNet, institutionNet, ...)
-# macro indicators — full wide DataFrame
-dartlab.macro()                                 # KR 12 indicators (CPI, rates, FX, production, ...)
-dartlab.macro("US")                             # US 25 indicators (GDP, CPI, Fed Funds, S&P500, ...)
-dartlab.macro("CPI")                            # single indicator (auto-detects KR)
-dartlab.macro("FEDFUNDS")                       # single indicator (auto-detects US)
-# consensus, news
-dartlab.consensus("005930")                     # target price & analyst opinion
-dartlab.news("삼성전자")                         # Google News RSS → DataFrame
-```
-**How data is collected — don't worry, it's safe:**
-| Source | Data | Method |
-|--------|------|--------|
-| Naver Chart API | KR OHLCV (adjusted prices) | `fchart.stock.naver.com` — 1 request per stock, max 6000 days |
-| Yahoo Finance v8 | US/Global OHLCV | `query2.finance.yahoo.com/v8/finance/chart` — public chart API |
-| ECOS (Bank of Korea) | KR macro indicators | Official API with user's own key |
-| FRED (St. Louis Fed) | US macro indicators | Official API with user's own key |
-| Naver Mobile API | Consensus, flow, sector PER | `m.stock.naver.com/api` — JSON endpoints |
-| FMP | Fallback for US history | Financial Modeling Prep API (optional) |
-**Safety infrastructure:**
-- **Rate limiting** — per-domain RPM caps (Naver 30, ECOS 30, FRED 120) with async queue
-- **Circuit breaker** — 3 consecutive failures → source disabled for 60s, half-open retry
-- **Fallback chains** — KR: naver → yahoo_direct → yahoo / US: yahoo_direct → fmp → yahoo
-- **Stale-while-revalidate** — returns cached data on failure, warns via `log.warning`
-- **User-Agent rotation** — randomized per request to avoid fingerprinting
-- **No silent failures** — all API errors logged at warning level, never swallowed
-- **No scraping** — all sources are public APIs or official data endpoints
-### Cross-Border Analysis (beta)
-> **Beta** — API may change after a warning. See [stability](docs/stability.md).
-```python
-c = dartlab.Company("005930")
-# keyword frequency across disclosure periods
-c.keywordTrend(keyword="AI")          # topic × period × keyword count
-c.keywordTrend()                      # all 54 built-in keywords
-# news headlines
-c.news()                              # recent 30 days
-dartlab.news("AAPL", market="US")     # US company news
-# global peer mapping (WICS → GICS sector)
-dartlab.crossBorderPeers("005930")    # → ["AAPL", "MSFT", "NVDA", "TSM", "AVGO"]
-# currency conversion (FRED-based)
-from dartlab.engines.common.finance import getExchangeRate, convertValue
-getExchangeRate("KRW")                # KRW/USD rate
-convertValue(1_000_000, "KRW", "USD") # → ~730.0
-# audit opinion normalization (KR/EN/JP → canonical code)
-from dartlab.engines.common.audit import normalizeAuditOpinion
-normalizeAuditOpinion("적정")          # → "unqualified"
-normalizeAuditOpinion("Qualified")     # → "qualified"
-```
-Disclosure gap detection runs automatically inside `c.insights` — flags mismatches between text changes and financial health (e.g. risk text surges while financials are stable).
-### Export (experimental)
-> **Experimental** — Breaking changes possible. Not for production.
-```bash
-dartlab excel "005930" -o samsung.xlsx
-```
-Install: `uv add "dartlab[ai]"` (Excel export is included in the AI extras).
-### Plugins
-```python
-dartlab.plugins()               # list loaded plugins
-dartlab.reload_plugins()        # rescan after installing a plugin
-```
-Plugins can extend DartLab with custom data sources, tools, or analysis engines. See `dartlab plugin create --help` for scaffolding.
-## EDGAR (US)
-Same `Company` interface, same account standardization pipeline, different data source. EDGAR data is auto-fetched from the SEC API — no pre-download needed:
-```python
-us = dartlab.Company("AAPL")
-us.sections                         # 10-K/10-Q sections with heading/body
-us.show("business")                 # business description
-us.show("10-K::item1ARiskFactors")  # risk factors
-us.BS                               # SEC XBRL balance sheet
-us.ratios                           # same 47 ratios
-us.diff("10-K::item7Mdna")          # MD&A text changes
-us.insights                         # 10-area grades (A~F)
-# analyst functions — auto-detect USD
-dartlab.valuation("AAPL")           # DCF + DDM + relative (USD)
-dartlab.forecast("AAPL")            # revenue forecast (USD)
-dartlab.simulation("AAPL")          # scenario simulation (US macro presets)
-```
-The interface is identical — same methods, same structure:
-```python
-# Korea (DART)                          # US (EDGAR)
-c = dartlab.Company("005930")           c = dartlab.Company("AAPL")
-c.sections                              c.sections
-c.show("businessOverview")              c.show("business")
-c.BS                                    c.BS
-c.ratios                                c.ratios
-c.diff("businessOverview")              c.diff("10-K::item7Mdna")
-c.insights.grades()                     c.insights.grades()
-```
-### DART vs EDGAR Namespaces
-|               | DART           | EDGAR          |
-|---------------|:--------------:|:--------------:|
-| `docs`        | ✓              | ✓              |
-| `finance`     | ✓              | ✓              |
-| `report`      | ✓ (28 API types) | ✗ (not applicable) |
-| `profile`     | ✓              | ✓              |
-DART has a `report` namespace with 28 structured disclosure APIs (dividend, governance, executive compensation, etc.). This does not exist in EDGAR — SEC filings are structured differently.
-**EDGAR topic naming**: Topics use `{formType}::{itemId}` format. Short aliases also work:
-```python
-us.show("10-K::item1Business")     # full form
-us.show("business")                # short alias
-us.show("risk")                    # → 10-K::item1ARiskFactors
-us.show("mdna")                    # → 10-K::item7Mdna
-```
-## AI Analysis
-> **Experimental** — the AI analysis layer and `analysis/` engines are under active development. APIs, output formats, and available tools may change between versions.
-> **Tip:** New to financial analysis or prefer natural language? Use `dartlab.ask()` — the AI assistant handles everything from data download to analysis. No coding knowledge required.
-DartLab includes a built-in AI analysis layer that feeds structured company data to LLMs. **No code required** — you can ask questions in plain language and DartLab handles everything: data selection, context assembly, and streaming the answer.
-```bash
-# terminal one-liner — no Python needed
-dartlab ask "삼성전자 재무건전성 분석해줘"
-```
-DartLab structures the data, selects relevant context (financials, insights, sector benchmarks), and lets the LLM explain:
-```
-$ dartlab ask "삼성전자 재무건전성 분석해줘"
-삼성전자의 재무건전성은 A등급입니다.
-▸ 부채비율 31.8% — 업종 평균(45.2%) 대비 양호
-▸ 유동비율 258.6% — 200% 안전 기준 상회
-▸ 이자보상배수 22.1배 — 이자 부담 매우 낮음
-▸ ROE 회복세: 1.6% → 10.2% (4분기 연속 개선)
-[데이터 출처: 2024Q4 사업보고서, dartlab insights 엔진]
-```
-For real-time market-wide disclosure questions (e.g. "최근 7일 수주공시 알려줘"), the AI uses your `OpenDART API key` to search recent filings directly. Store the key in project `.env` or via UI Settings.
-The 2-tier architecture means basic analysis works with any provider, while tool-calling providers (OpenAI, Claude) can go deeper by requesting additional data mid-conversation.
-### Python API
-```python
-import dartlab
-# streams to stdout, returns full text
-answer = dartlab.ask("삼성전자 재무건전성 분석해줘")
-# provider + model override
-answer = dartlab.ask("삼성전자 분석", provider="openai", model="gpt-4o")
-# data filtering
-answer = dartlab.ask("삼성전자 핵심 포인트", include=["BS", "IS"])
-# analysis pattern (framework-guided)
-answer = dartlab.ask("삼성전자 분석", pattern="financial")
-# agent mode — LLM selects tools for deeper analysis
-answer = dartlab.chat("005930", "배당 추세를 분석하고 이상 징후를 찾아줘")
-```
-### CLI
-```bash
-# provider setup — free providers first
-dartlab setup              # list all providers
-dartlab setup gemini       # Google Gemini (free)
-dartlab setup groq         # Groq (free)
-# status
-dartlab status             # all providers (table view)
-dartlab status --cost      # cumulative token/cost stats
-# ask questions (streaming by default)
-dartlab ask "삼성전자 재무건전성 분석해줘"
-dartlab ask "AAPL risk analysis" -p ollama
-dartlab ask --continue "배당 추세는?"
-# auto-generate report
-dartlab report "삼성전자" -o report.md
-# web UI
-dartlab                    # open browser UI
-dartlab --help             # show all commands
-```
-<details>
-<summary>All CLI commands (16)</summary>
-| Category | Command | Description |
-|----------|---------|-------------|
-| Data | `show` | Open any topic by name |
-| Data | `search` | Find companies by name or code |
-| Data | `statement` | BS / IS / CF / SCE output |
-| Data | `sections` | Raw docs sections |
-| Data | `profile` | Company index and facts |
-| Data | `modules` | List all available modules |
-| AI | `ask` | Natural language question |
-| AI | `report` | Auto-generate analysis report |
-| Export | `excel` | Export to Excel (experimental) |
-| Collect | `collect` | Download / refresh / batch collect |
-| Collect | `collect --check` | Check freshness (new filings) |
-| Collect | `collect --incremental` | Incremental collect (missing only) |
-| Server | `ai` | Launch web UI (localhost:8400) |
-| Server | `share` | Tunnel sharing (ngrok / cloudflared) |
-| Server | `status` | Provider connection status |
-| Server | `setup` | Provider setup wizard |
-| MCP | `mcp` | Start MCP stdio server |
-| Plugin | `plugin` | Create / list plugins |
-</details>
-### Providers
-**Free API key providers** — sign up, paste the key, start analyzing:
-| Provider | Free Tier | Model | Setup |
-|----------|-----------|-------|-------|
-| `gemini` | Gemini 2.5 Pro/Flash free | Gemini 2.5 | `dartlab setup gemini` |
-| `groq` | 6K–30K TPM free | LLaMA 3.3 70B | `dartlab setup groq` |
-| `cerebras` | 1M tokens/day permanent | LLaMA 3.3 70B | `dartlab setup cerebras` |
-| `mistral` | 1B tokens/month free | Mistral Small | `dartlab setup mistral` |
-**Other providers:**
-| Provider | Auth | Cost | Tool Calling |
-|----------|------|------|:---:|
-| `oauth-codex` | ChatGPT subscription (Plus/Team/Enterprise) | Included in subscription | Yes |
-| `openai` | API key (`OPENAI_API_KEY`) | Pay-per-token | Yes |
-| `ollama` | Local install, no account needed | Free | Depends on model |
-| `codex` | Codex CLI installed locally | Free (uses your Codex session) | Yes |
-| `custom` | Any OpenAI-compatible endpoint | Varies | Varies |
-**Auto-fallback:** Set multiple free API keys and DartLab automatically switches to the next provider when one hits its rate limit. Use `provider="free"` to enable the fallback chain:
-```python
-dartlab.ask("삼성전자 분석", provider="free")
-```
-**Why no Claude provider?** Anthropic does not offer OAuth-based access. Without OAuth, there is no way to let users authenticate with their existing subscription — we would have to ask users to paste API keys, which goes against DartLab's frictionless design. If Anthropic adds OAuth support in the future, we will add a Claude provider. For now, Claude works through **MCP** (see below) — Claude Desktop, Claude Code, and Cursor can call DartLab's 60 tools directly.
-**`oauth-codex`** is the recommended provider — if you have a ChatGPT subscription, it works out of the box with no API keys. Run `dartlab setup oauth-codex` to authenticate.
-**Web UI (`dartlab`)** launches a browser-based chat interface for interactive analysis. This feature is currently **experimental** — we are evaluating the right scope and UX for visualization and collaborative features.
-Install AI dependencies: `uv add "dartlab[ai]"`
-### Project Settings (`.dartlab.yml`)
-```yaml
-company: 005930         # default company
-provider: openai        # default LLM provider
-model: gpt-4o           # default model
-verbose: false
-```
-## MCP — AI Assistant Integration
-DartLab includes a built-in [MCP](https://modelcontextprotocol.io/) server that exposes 60 tools (16 global + 44 per-company) to Claude Desktop, Claude Code, Cursor, and any MCP-compatible client.
-```bash
-uv add "dartlab[mcp]"
-```
-### Claude Desktop
-Add to `claude_desktop_config.json`:
-```json
-{
-  "mcpServers": {
-    "dartlab": {
-      "command": "uv",
-      "args": ["run", "dartlab", "mcp"]
-    }
-  }
-}
-```
-### Claude Code
-```bash
-claude mcp add dartlab -- uv run dartlab mcp
-```
-Or add to `~/.claude/settings.json`:
-```json
-{
-  "mcpServers": {
-    "dartlab": {
-      "command": "uv",
-      "args": ["run", "dartlab", "mcp"]
-    }
-  }
-}
-```
-### Cursor
-Add to `.cursor/mcp.json` with the same config format as Claude Desktop.
-### What's Available
-Once connected, your AI assistant can:
-- **Search** — find companies by name or code (`search_company`)
-- **Show** — read any disclosure topic (`show_topic`, `list_topics`, `diff_topic`)
-- **Finance** — balance sheet, income statement, cash flow, ratios (`get_financial_statements`, `get_ratios`)
-- **Analysis** — insights, sector ranking, valuation (`get_insight`, `get_ranking`)
-- **EDGAR** — same tools work for US companies (`stock_code: "AAPL"`)
-Auto-generate config for your platform:
-```bash
-dartlab mcp --config claude-desktop
-dartlab mcp --config claude-code
-dartlab mcp --config cursor
-```
-## OpenAPI — Raw Public APIs
-Use source-native wrappers when you want raw disclosure APIs directly.
-### OpenDart (Korea)
-> **Note:** `Company` does **not** require an API key — it uses pre-built datasets.
-> `OpenDart` uses the raw DART API and requires a key from [opendart.fss.or.kr](https://opendart.fss.or.kr) (free).
-> Recent filing-list AI questions across the whole market also use this key. In the UI, open Settings and manage `OpenDART API key` there.
-```python
-from dartlab import OpenDart
-d = OpenDart()
-d.search("카카오", listed=True)
-d.filings("삼성전자", "2024")
-d.finstate("삼성전자", 2024)
-d.report("삼성전자", "배당", 2024)
-```
-### OpenEdgar (US)
-> **No API key required.** SEC EDGAR is a public API — no registration needed.
-```python
-from dartlab import OpenEdgar
-e = OpenEdgar()
-e.search("Apple")
-e.filings("AAPL", forms=["10-K", "10-Q"])
-e.companyFactsJson("AAPL")
-```
-## Data
-**No manual setup required.** When you create a `Company`, dartlab automatically downloads the required data.
-| Dataset | Coverage | Size | Source |
-|---------|----------|------|--------|
-| DART docs | 2,500+ companies | ~8 GB | [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data/tree/main/dart/docs) |
-| DART finance | 2,700+ companies | ~600 MB | [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data/tree/main/dart/finance) |
-| DART report | 2,700+ companies | ~320 MB | [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data/tree/main/dart/report) |
-| EDGAR | On-demand | — | SEC API (auto-fetched) |
-### 3-Step Data Pipeline
-```
-dartlab.Company("005930")
-  │
-  ├─ 1. Local cache ──── already have it? done (instant)
-  │
-  ├─ 2. HuggingFace ──── auto-download (~seconds, no key needed)
-  │
-  └─ 3. DART API ──────── collect with your API key (needs key)
-```
-If a company is not in HuggingFace, dartlab collects data directly from DART — this requires an API key:
-```bash
-dartlab setup dart-key
-```
-### Freshness — Automatic Update Detection
-DartLab uses a 3-layer freshness system to keep your local data current:
-| Layer | Method | Cost |
-|-------|--------|------|
-| L1 | HTTP HEAD → ETag comparison with HuggingFace | ~0.5s, few hundred bytes |
-| L2 | Local file age (90-day TTL fallback) | instant (local) |
-| L3 | DART API → `rcept_no` diff (requires API key) | 1 API call, ~1s |
-When you open a `Company`, dartlab checks if newer data exists. If a new disclosure was filed:
-```python
-c = dartlab.Company("005930")
-# [dartlab] ⚠ 005930 — 새 공시 2건 발견 (사업보고서 (2024.12))
-#   • 증분 수집: dartlab collect --incremental 005930
-#   • 또는 Python: c.update()
-c.update()  # incremental collect — only missing filings
-```
-```bash
-# CLI freshness check
-dartlab collect --check 005930         # single company
-dartlab collect --check                # scan all local companies (7 days)
-# incremental collect — only missing filings
-dartlab collect --incremental 005930   # single company
-dartlab collect --incremental          # all local companies with new filings
-```
-### Batch Collection (DART API)
-```bash
-dartlab collect --batch                    # all listed, missing only
-dartlab collect --batch -c finance 005930  # specific category + company
-dartlab collect --batch --mode all         # re-collect everything
-```
-## Try It Now
-### Live Demo (No Install)
-Try DartLab instantly — no Python, no terminal, no setup:
-**[→ Open Live Demo](https://huggingface.co/spaces/eddmpython/dartlab)** — enter a stock code, see financials immediately
-Or open a [Colab notebook](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/01_quickstart.ipynb) in your browser.
-### Marimo Notebooks
-> Data is automatically downloaded on first use. No setup required unless collecting new companies directly from DART.
-```bash
-uv add dartlab marimo
-marimo edit notebooks/marimo/dartCompany.py    # Korean company (DART)
-marimo edit notebooks/marimo/edgarCompany.py   # US company (EDGAR)
-marimo edit notebooks/marimo/aiAnalysis.py     # AI analysis examples
-```
-### Colab Notebooks
-**Showcase** (English — global audience):
-| Notebook | Topic |
-|---|---|
-| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/01_quickstart.ipynb) | **Quick Start** — analyze any company in 3 lines |
-| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/02_financial_analysis.ipynb) | **Financial Analysis** — statements, time series, ratios |
-| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/03_kr_us_compare.ipynb) | **Korea vs US** — Samsung vs Apple side-by-side |
-| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/04_risk_diff.ipynb) | **Risk Diff** — track disclosure changes (Bloomberg can't) |
-| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/05_sector_screening.ipynb) | **Sector Screening** — 8 presets, sector benchmarks |
-| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/06_insight_anomaly.ipynb) | **Insight & Anomaly** — 10-area grading, 6 anomaly rules |
-| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/07_network_governance.ipynb) | **Network & Governance** — corporate relationship graph |
-| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/08_signal_trend.ipynb) | **Signal Trends** — 48-keyword disclosure monitoring |
-| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/09_ai_analysis.ipynb) | **AI Analysis** — `dartlab.ask()` with 9 LLM providers |
-| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/10_disclosure_deep_dive.ipynb) | **Disclosure Deep Dive** — sections architecture |
-<details>
-<summary>한국어 Tutorials</summary>
-| Notebook | Topic |
-|---|---|
-| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/getting-started/quickstart.ipynb) | **빠른 시작** — sections, show, trace, diff |
-| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/tutorials/02_financial_statements.ipynb) | **재무제표** — BS, IS, CF |
-| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/tutorials/04_ratios.ipynb) | **재무비율** — 47개 비율 |
-| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/tutorials/06_disclosure.ipynb) | **공시 텍스트** — sections 파싱 |
-| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/tutorials/09_edgar.ipynb) | **EDGAR** — 미국 SEC |
-</details>
-## Documentation
-- Docs: https://eddmpython.github.io/dartlab/
-- Sections guide: https://eddmpython.github.io/dartlab/docs/getting-started/sections
-- Quick start: https://eddmpython.github.io/dartlab/docs/getting-started/quickstart
-- API overview: https://eddmpython.github.io/dartlab/docs/api/overview
-- Beginner guide (Korean): https://eddmpython.github.io/dartlab/blog/dartlab-easy-start/
-### Blog
-The [DartLab Blog](https://eddmpython.github.io/dartlab/blog/) covers practical disclosure analysis — how to read reports, interpret patterns, and spot risk signals. 120+ articles across three categories:
-- **Disclosure Systems** — structure and mechanics of DART/EDGAR filings
-- **Report Reading** — practical guide to audit reports, preliminary earnings, restatements
-- **Financial Interpretation** — financial statements, ratios, and disclosure signals
-## Stability
-| Tier | Scope |
-|------|-------|
-| **Stable** | DART Company (sections, show, trace, diff, BS/IS/CF, CIS, index, filings, profile), EDGAR Company core, valuation, forecast, simulation |
-| **Beta** | EDGAR power-user (SCE, notes, freq, coverage), insights, distress, ratios, timeseries, network, governance, workforce, capital, debt, chart/table/text tools, ask/chat, OpenDart, OpenEdgar, Server API, MCP, CLI subcommands |
-| **Experimental** | AI tool calling, export |
-| **Alpha** | Desktop App (Windows .exe) — functional but incomplete, Sections Viewer — not yet fully structured |
-See [docs/stability.md](docs/stability.md).
-## Contributing
-The project prefers **experiments before engine changes**. If you want to propose a parser or mapping change, validate it in `experiments/` first and bring the verified result back into the engine.
-- **Experiment folder**: `experiments/XXX_camelCaseName/` — each file must be independently runnable with actual results in its docstring
-- **Data contributions** (e.g. `accountMappings.json`, `sectionMappings.json`): only accepted when backed by experiment evidence — no manual bulk edits
-- Issues and PRs in Korean or English are both welcome
-## License
-MIT

pyproject.toml CHANGED Viewed

@@ -59,6 +59,7 @@ dependencies = [
     "orjson>=3.10.0,<4",
     "polars>=1.0.0,<2",
     "requests>=2.32.5,<3",
     "rich>=14.3.3,<15",
     "plotly>=5.0.0,<6",
     "mcp[cli]>=1.0",

     "orjson>=3.10.0,<4",
     "polars>=1.0.0,<2",
     "requests>=2.32.5,<3",
+    "prompt-toolkit>=3.0,<4",
     "rich>=14.3.3,<15",
     "plotly>=5.0.0,<6",
     "mcp[cli]>=1.0",

src/dartlab/ai/DEV.md CHANGED Viewed

@@ -1,5 +1,77 @@
 # AI Engine Development Guide
 ## Source Of Truth
 - 데이터 source-of-truth: `src/dartlab/core/registry.py`

 # AI Engine Development Guide
+## 설계 사상
+### dartlab AI는 무엇인가
+dartlab의 핵심 자산은 데이터 엔진이다. 전자공시 원본을 정규화하여 **전기간 비교가능 + 기업간 비교가능**한 구조로 만든 것이 dartlab의 존재 이유다. AI는 이 데이터 위에서 동작하는 **소비자**이지, 데이터를 대체하지 않는다.
+**LLM은 해석자이지 분석가가 아니다.**
+- 계산은 엔진이 한다 (ratios, timeseries, insights, valuation)
+- 판단은 엔진이 한다 (anomaly detection, scoring, red flags)
+- LLM은 엔진 결과를 받아서 **"왜"를 설명하고, 인과 관계를 서술하고, 사용자 질문에 답한다**
+이것이 dexter와의 근본적 차이다:
+- dexter: 데이터 없음. LLM이 외부 API를 호출해서 데이터를 수집하고 분석. LLM이 전부.
+- dartlab: 데이터 엔진이 전부. LLM은 정규화된 데이터를 읽고 해석하는 마지막 계층.
+### 2-Tier 아키텍처
+- **Tier 1 (시스템 주도)**: 질문 분류 → 엔진 계산 → 결과를 컨텍스트로 조립 → LLM에 한 번 전달. 모든 provider에서 동작. tool calling 불필요.
+- **Tier 2 (LLM 주도)**: Tier 1 결과를 보고 LLM이 "부족하다" 판단 → 도구 호출로 추가 탐색. tool calling 가능한 provider에서만 동작.
+Tier 1이 충분하면 LLM roundtrip은 1회다. 이것이 속도의 핵심이다.
+### 속도 원칙
+**LLM roundtrip을 줄이는 것이 속도다.**
+- 더 많은 데이터를 미리 조립해서 1회에 끝내는 것이 빠르다 (Tier 1 강화)
+- 도구 호출을 병렬화하는 것보다, 애초에 호출이 필요 없게 만드는 것이 빠르다
+- changes(공시 변화분 23%)를 컨텍스트에 미리 넣으면 "뭐가 바뀌었지?" 탐색 호출이 사라진다
+### dexter에서 흡수한 것
+| 패턴 | dexter 원본 | dartlab 적용 |
+|------|------------|-------------|
+| Scratchpad | 도구 결과 누적/토큰 관리 | `runtime/scratchpad.py` — 중복 호출 방지, 토큰 예산 |
+| SOUL.md | 분석 철학 주입 | `templates/analysisPhilosophy.py` — Palepu-Healy + CFA 사고 프레임 |
+| stripFieldsDeep | 도구 결과 필드 제거 | `context/pruning.py` — XBRL 메타데이터 재귀 제거 |
+| SKILL.md | 워크플로우 가이드 | `skills/catalog.py` — 8개 분석 스킬 (도구 비의존) |
+| 자율 에이전트 | 충분할 때까지 탐색 | `agentLoopAutonomous()` — report_mode Tier 2 |
+| 세션 메모리 | SQLite + 시간 감쇠 | `memory/store.py` — 분석 기록 영속 |
+### 흡수하지 않은 것
+- **데이터 소유 구조**: dexter는 외부 API로 데이터 수집. dartlab은 이미 데이터 엔진을 소유.
+- **단일 모델 의존**: dexter는 모든 판단을 LLM에 위임. dartlab은 엔진이 계산/판단하고 LLM은 해석만.
+- **meta-tool 패턴**: 도구 안에 도구를 넣는 구조. dartlab은 Super Tool 7개로 이미 해결.
+### 사용자 원칙
+- **접근성**: 종목코드 하나면 끝. `dartlab ask "005930" "영업이익률 추세는?"` 또는 `dartlab chat`으로 인터랙티브.
+- **신뢰성**: 숫자는 엔진이 계산한 원본. LLM이 숫자를 만들어내면 검증 레이어가 잡는다.
+- **투명성**: 어떤 데이터를 봤는지(includedEvidence), 어떤 도구를 썼는지(tool_call) 항상 노출.
+### 품질 검증 기준선 (2026-03-27)
+ollama qwen3:4b 기준 critical+high 35건 배치 결과:
+| 지표 | 값 | 비고 |
+|------|-----|------|
+| avgOverall | 7.33 | gemini fallback 수정 후 재측정 (수정 전 5.98) |
+| routeMatch | 1.00 | intent 분류 + 라우팅 완벽 |
+| moduleUtilization | 0.75 | 일부 eval 케이스 정합성 문제 포함 |
+| falseUnavailable | 0/35 | "데이터 없다" 거짓 응답 없음 |
+production 모델(openai/gemini) 측정은 API 키 확보 후 진행 예정. factual accuracy는 production 모델에서만 유의미.
+주요 failure taxonomy:
+- **runtime_error**: provider 설정 정합성 (해결됨)
+- **retrieval_failure**: eval 케이스 expectedModules와 실제 컨텍스트 빌더 매핑 간극
+- **generation_failure**: 소형 모델 한계 (production 모델에서 재측정 필요)
+---
 ## Source Of Truth
 - 데이터 source-of-truth: `src/dartlab/core/registry.py`

src/dartlab/ai/context/builder.py CHANGED Viewed

@@ -231,8 +231,7 @@ _CANDIDATE_ALIASES = {
 }
 _MARGIN_DRIVER_MARGIN_HINTS = frozenset({"영업이익률", "마진", "이익률", "margin"})
 _MARGIN_DRIVER_COST_HINTS = frozenset({"비용 구조", "원가 구조", "비용", "원가", "판관비", "매출원가"})
-_MARGIN_DRIVER_BUSINESS_HINTS = frozenset({"사업 변화", "사업변화", "사업 구조", "사업구조"})
-_RECENT_DISCLOSURE_BUSINESS_HINTS = frozenset({"사업 변화", "사업변화", "사업 구조", "사업구조"})
 _PERIOD_COLUMN_RE = re.compile(r"^\d{4}(?:Q[1-4])?$")
@@ -372,13 +371,13 @@ def _has_margin_driver_pattern(question: str) -> bool:
     return (
         _question_has_any(question, _MARGIN_DRIVER_MARGIN_HINTS)
         and _question_has_any(question, _MARGIN_DRIVER_COST_HINTS)
-        and _question_has_any(question, _MARGIN_DRIVER_BUSINESS_HINTS)
     )
 def _has_recent_disclosure_business_pattern(question: str) -> bool:
     lowered = question.lower()
-    return "최근 공시" in lowered and _question_has_any(question, _RECENT_DISCLOSURE_BUSINESS_HINTS)
 def _resolve_direct_hint_modules(question: str) -> list[str]:
@@ -957,6 +956,61 @@ def _build_sections_context(
     return result
 def _select_section_slices(context_slices: Any, topic: str) -> pl.DataFrame | None:
     if not isinstance(context_slices, pl.DataFrame) or context_slices.is_empty():
         return None
@@ -1203,6 +1257,14 @@ def _build_compact_context_modules_inner(
                 if included_name not in included:
                     included.append(included_name)
     direct_sections = _build_direct_module_context(
         company,
         candidate_plan.get("direct", []),

 }
 _MARGIN_DRIVER_MARGIN_HINTS = frozenset({"영업이익률", "마진", "이익률", "margin"})
 _MARGIN_DRIVER_COST_HINTS = frozenset({"비용 구조", "원가 구조", "비용", "원가", "판관비", "매출원가"})
+_BUSINESS_CHANGE_HINTS = frozenset({"사업 변화", "사업변화", "사업 구조", "사업구조"})
 _PERIOD_COLUMN_RE = re.compile(r"^\d{4}(?:Q[1-4])?$")
     return (
         _question_has_any(question, _MARGIN_DRIVER_MARGIN_HINTS)
         and _question_has_any(question, _MARGIN_DRIVER_COST_HINTS)
+        and _question_has_any(question, _BUSINESS_CHANGE_HINTS)
     )
 def _has_recent_disclosure_business_pattern(question: str) -> bool:
     lowered = question.lower()
+    return "최근 공시" in lowered and _question_has_any(question, _BUSINESS_CHANGE_HINTS)
 def _resolve_direct_hint_modules(question: str) -> list[str]:
     return result
+def _build_changes_context(company: Any, *, compact: bool = True) -> str:
+    """sections 변화 요약을 LLM 컨텍스트용 마크다운으로 변환.
+    전체 sections(97MB) 대신 변화분(23%)만 요약하여 제공.
+    LLM이 추가 도구 호출 없이 "무엇이 바뀌었는지" 즉시 파악 가능.
+    """
+    docs = getattr(company, "docs", None)
+    sections = getattr(docs, "sections", None)
+    if sections is None or not hasattr(sections, "changeSummary"):
+        return ""
+    try:
+        summary = sections.changeSummary(topN=8 if compact else 15)
+    except (AttributeError, TypeError, ValueError, pl.exceptions.PolarsError):
+        return ""
+    if summary is None or summary.is_empty():
+        return ""
+    lines = ["\n## 공시 변화 요약"]
+    lines.append("| topic | 변화유형 | 건수 | 평균크기변화 |")
+    lines.append("|-------|---------|------|------------|")
+    for row in summary.iter_rows(named=True):
+        topic = row.get("topic", "")
+        changeType = row.get("changeType", "")
+        count = row.get("count", 0)
+        avgDelta = row.get("avgDelta", 0)
+        sign = "+" if avgDelta and avgDelta > 0 else ""
+        lines.append(f"| {topic} | {changeType} | {count} | {sign}{avgDelta} |")
+    # 최근 기간 주요 변화 미리보기
+    try:
+        changes = sections.changes()
+    except (AttributeError, TypeError, ValueError, pl.exceptions.PolarsError):
+        changes = None
+    if changes is not None and not changes.is_empty():
+        # 가장 최근 기간 전환에서 structural/appeared 변화만 발췌
+        latestPeriod = changes.get_column("toPeriod").max()
+        recent = changes.filter(
+            (pl.col("toPeriod") == latestPeriod) & pl.col("changeType").is_in(["structural", "appeared"])
+        )
+        if not recent.is_empty():
+            lines.append(f"\n### 최근 주요 변화 ({latestPeriod})")
+            for row in recent.head(5 if compact else 10).iter_rows(named=True):
+                topic = row.get("topic", "")
+                ct = row.get("changeType", "")
+                preview = row.get("preview", "")
+                if preview:
+                    preview = preview[:120] + "..." if len(preview) > 120 else preview
+                lines.append(f"- **{topic}** [{ct}]: {preview}")
+    return "\n".join(lines)
 def _select_section_slices(context_slices: Any, topic: str) -> pl.DataFrame | None:
     if not isinstance(context_slices, pl.DataFrame) or context_slices.is_empty():
         return None
                 if included_name not in included:
                     included.append(included_name)
+    # 변화 컨텍스트 — sections 변화분만 LLM에 전달 (roundtrip 감소)
+    if route in {"sections", "hybrid"}:
+        changes_context = _build_changes_context(company, compact=compact)
+        if changes_context:
+            modules_dict["_changes"] = changes_context
+            if "_changes" not in included:
+                included.append("_changes")
     direct_sections = _build_direct_module_context(
         company,
         candidate_plan.get("direct", []),

src/dartlab/ai/context/pruning.py ADDED Viewed

	@@ -0,0 +1,95 @@

+"""도구 결과 필드 pruning — LLM에 불필요한 컬럼/필드 재귀 제거.
+dexter의 stripFieldsDeep 패턴을 Python에 적용.
+토큰 절약 + 분석 관련성 향상.
+"""
+from __future__ import annotations
+import json
+from typing import Any
+# LLM 분석에 불필요한 필드 — 재귀적으로 제거
+_STRIP_FIELDS: frozenset[str] = frozenset(
+    {
+        # XBRL 메타데이터
+        "concept_id",
+        "xbrl_context_id",
+        "instant",
+        "member",
+        "dimension",
+        "label_ko_raw",
+        # 공시 메타데이터
+        "acceptance_number",
+        "rcept_no",
+        "filing_date",
+        "report_code",
+        "reprt_code",
+        "corp_cls",
+        "corp_code",
+        # 기술적 식별자
+        "sj_div",
+        "ord",
+        "data_rank",
+        "source_file",
+        "source_path",
+        "sourceBlockOrder",
+        # 중복/내부용
+        "account_id_raw",
+        "account_nm_raw",
+        "currency",
+    }
+)
+# 모듈별 추가 제거 필드
+_MODULE_STRIP: dict[str, frozenset[str]] = {
+    "finance": frozenset({"bsns_year", "sj_nm", "stock_code", "fs_div", "fs_nm"}),
+    "explore": frozenset({"blockHash", "rawHtml", "charCount"}),
+    "report": frozenset({"rcept_no", "corp_code", "corp_cls"}),
+}
+def pruneToolResult(toolName: str, result: str, *, maxChars: int = 8000) -> str:
+    """도구 결과 문자열에서 불필요 필드를 제거."""
+    if not result or len(result) < 100:
+        return result
+    # JSON 파싱 시도
+    try:
+        data = json.loads(result)
+    except (json.JSONDecodeError, ValueError):
+        # JSON이 아니면 그대로 반환 (마크다운 테이블 등)
+        return result[:maxChars] if len(result) > maxChars else result
+    # 모듈별 추가 필드 결정
+    category = _resolveCategory(toolName)
+    extra = _MODULE_STRIP.get(category, frozenset())
+    stripFields = _STRIP_FIELDS | extra
+    pruned = _pruneValue(data, stripFields, depth=0)
+    text = json.dumps(pruned, ensure_ascii=False, indent=2, default=str)
+    if len(text) > maxChars:
+        return text[:maxChars] + "\n... (pruned+truncated)"
+    return text
+def _pruneValue(value: Any, stripFields: frozenset[str], depth: int) -> Any:
+    """재귀적 필드 제거."""
+    if depth > 8:
+        return value
+    if isinstance(value, dict):
+        return {k: _pruneValue(v, stripFields, depth + 1) for k, v in value.items() if k not in stripFields}
+    if isinstance(value, list):
+        return [_pruneValue(item, stripFields, depth + 1) for item in value]
+    return value
+def _resolveCategory(toolName: str) -> str:
+    """도구 이름에서 카테고리 추출."""
+    if toolName in ("finance", "get_data", "compute_ratios"):
+        return "finance"
+    if toolName in ("explore", "show", "search_data"):
+        return "explore"
+    if toolName in ("report", "get_report"):
+        return "report"
+    return ""

src/dartlab/ai/conversation/prompts.py CHANGED Viewed

@@ -36,6 +36,14 @@ from .templates.analysis_rules import (
 from .templates.analysis_rules import (
     TOPIC_PROMPTS as _TOPIC_PROMPTS,
 )
 from .templates.benchmarks import _INDUSTRY_BENCHMARKS, _SECTOR_MAP
 from .templates.self_critique import (
     SELF_CRITIQUE_PROMPT,
@@ -43,8 +51,6 @@ from .templates.self_critique import (
 from .templates.self_critique import (
     SIGNAL_KEYWORDS as _SIGNAL_KEYWORDS,
 )
-# ── 템플릿 데이터 임포트 ──────────────────────────────────
 from .templates.system_base import (
     EDGAR_SUPPLEMENT_EN,
     EDGAR_SUPPLEMENT_KR,
@@ -63,6 +69,19 @@ _PLUGIN_SYSTEM_PROMPT = """
 - 분석 중 플러그인 추천 힌트가 제공되면, 답변 끝에 자연스럽게 안내하세요.
 """
 # ══════════════════════════════════════
 # 질문 분류
 # ══════════════════════════════════════
@@ -235,7 +254,7 @@ def build_system_prompt_parts(
     if compact:
         base = _strip_tool_guidance(SYSTEM_PROMPT_COMPACT) if not allow_tools else SYSTEM_PROMPT_COMPACT
-        static_parts: list[str] = []
         dynamic_parts: list[str] = []
         benchmark_key = _match_sector(sector) if sector else None
@@ -259,7 +278,11 @@ def build_system_prompt_parts(
             if qt in _FEW_SHOT_COMPACT:
                 static_parts.append(_FEW_SHOT_COMPACT[qt])
-        # 동적: report_mode + 플러그인
         if report_mode:
             dynamic_parts.append(_REPORT_PROMPT_COMPACT)
@@ -284,10 +307,10 @@ def build_system_prompt_parts(
         base = SYSTEM_PROMPT_EN
     if not allow_tools:
         base = _strip_tool_guidance(base)
-    static_parts = []
     dynamic_parts = []
-    # 정적: 벤치마크 + 토픽 + 교차검증 + Few-shot
     benchmark_key = _match_sector(sector) if sector else None
     if benchmark_key and benchmark_key in _INDUSTRY_BENCHMARKS:
         static_parts.append(_INDUSTRY_BENCHMARKS[benchmark_key])
@@ -314,7 +337,11 @@ def build_system_prompt_parts(
         edgar_supp = EDGAR_SUPPLEMENT_EN if lang == "en" else EDGAR_SUPPLEMENT_KR
         static_parts.append(edgar_supp)
-    # 동적: report_mode + 플러그인
     if report_mode:
         dynamic_parts.append(_REPORT_PROMPT)

 from .templates.analysis_rules import (
     TOPIC_PROMPTS as _TOPIC_PROMPTS,
 )
+# ── 템플릿 데이터 임포트 ──────────────────────────────────
+from .templates.analysisPhilosophy import (
+    ANALYSIS_PHILOSOPHY_COMPACT as _PHILOSOPHY_COMPACT,
+)
+from .templates.analysisPhilosophy import (
+    ANALYSIS_PHILOSOPHY_KR as _PHILOSOPHY_KR,
+)
 from .templates.benchmarks import _INDUSTRY_BENCHMARKS, _SECTOR_MAP
 from .templates.self_critique import (
     SELF_CRITIQUE_PROMPT,
 from .templates.self_critique import (
     SIGNAL_KEYWORDS as _SIGNAL_KEYWORDS,
 )
 from .templates.system_base import (
     EDGAR_SUPPLEMENT_EN,
     EDGAR_SUPPLEMENT_KR,
 - 분석 중 플러그인 추천 힌트가 제공되면, 답변 끝에 자연스럽게 안내하세요.
 """
+# ── 스킬 매칭 헬퍼 ──────────────────────────────────
+def _matchSkillSafe(questionType: str | None, qTypes: list[str]) -> Any:
+    """스킬 매칭 (import 실패 시 None)."""
+    try:
+        from dartlab.ai.skills.registry import matchSkill
+        return matchSkill("", questionType=questionType or (qTypes[0] if qTypes else None))
+    except Exception:
+        return None
 # ══════════════════════════════════════
 # 질문 분류
 # ══════════════════════════════════════
     if compact:
         base = _strip_tool_guidance(SYSTEM_PROMPT_COMPACT) if not allow_tools else SYSTEM_PROMPT_COMPACT
+        static_parts: list[str] = [_PHILOSOPHY_COMPACT]
         dynamic_parts: list[str] = []
         benchmark_key = _match_sector(sector) if sector else None
             if qt in _FEW_SHOT_COMPACT:
                 static_parts.append(_FEW_SHOT_COMPACT[qt])
+        # 동적: skill + report_mode + 플러그인
+        _skill = _matchSkillSafe(question_type, q_types)
+        if _skill:
+            dynamic_parts.append(_skill.toPrompt())
         if report_mode:
             dynamic_parts.append(_REPORT_PROMPT_COMPACT)
         base = SYSTEM_PROMPT_EN
     if not allow_tools:
         base = _strip_tool_guidance(base)
+    static_parts = [_PHILOSOPHY_KR]
     dynamic_parts = []
+    # 정적: 철학 + 벤치마크 + 토픽 + 교차검증 + Few-shot
     benchmark_key = _match_sector(sector) if sector else None
     if benchmark_key and benchmark_key in _INDUSTRY_BENCHMARKS:
         static_parts.append(_INDUSTRY_BENCHMARKS[benchmark_key])
         edgar_supp = EDGAR_SUPPLEMENT_EN if lang == "en" else EDGAR_SUPPLEMENT_KR
         static_parts.append(edgar_supp)
+    # 동적: skill + report_mode + 플러그인
+    _skill = _matchSkillSafe(question_type, q_types)
+    if _skill:
+        dynamic_parts.append(_skill.toPrompt())
     if report_mode:
         dynamic_parts.append(_REPORT_PROMPT)

src/dartlab/ai/conversation/templates/analysisPhilosophy.py ADDED Viewed

	@@ -0,0 +1,57 @@

+"""분석 철학 — Palepu-Healy + CFA 프레임워크 기반 사고 프레임.
+기존 system_base.py의 7단계 프레임워크는 "어떻게 분석하라"(절차).
+이 철학은 "어떤 관점으로 보라"(사고 프레임)를 주입한다.
+dexter의 SOUL.md 패턴을 dartlab에 적용.
+"""
+from __future__ import annotations
+ANALYSIS_PHILOSOPHY_KR = """\
+## 분석 철학
+### 원칙 1: 숫자 뒤의 이야기를 읽어라
+재무제표는 경영 의사결정의 결과물이다. 수치 변화를 보면 "왜?"를 반드시 추적하라.
+매출이 올랐다면 → 물량인가, 단가인가, 믹스 변화인가?
+이익률이 떨어졌다면 → 원가인가, 판관비인가, 일회성인가?
+### 원칙 2: 이익의 질을 의심하라
+회계 이익과 현금 이익은 다르다.
+- 영업CF가 순이익을 지속적으로 하회하면 발생주의 이익을 의심하라
+- 운전자본 변화, 감가상각 대비 CAPEX, 자본화 정책을 확인하라
+- Accrual Ratio가 높으면 이익의 지속가능성에 물음표를 붙여라
+### 원칙 3: 구조를 분해하라
+- ROE는 DuPont으로 분해: 수익성 × 효율성 × 레버리지
+- 매출은 부문별, 지역별, 제품별로 분해
+- 비용은 성격별(원재료/인건비/감가)로 분해
+- 합산 숫자만 보면 구조 변화를 놓친다
+### 원칙 4: 교차검증하라
+- 공시 서술과 재무 수치가 일치하는지 확인
+- 경영진 코멘트와 실제 자본 배분이 부합하는지 확인
+- 부문 합산과 연결 수치가 정합하는지 확인
+- 불일치가 있으면 명시적으로 지적하라
+### 원칙 5: 시간축으로 판단하라
+- 단일 분기 스냅샷이 아니라 3~5년 추세로 판단
+- 일회성과 반복성을 분리
+- 성장이 유기적인지 인수에 의한 것인지 구분
+- 미래 추정은 과거 추세의 연장이 아니라 구조적 변화를 반영
+### 원칙 6: 리스크를 먼저 찾아라
+- "이 회사가 왜 좋은가"보다 "무엇이 잘못될 수 있는가"를 먼저 탐색
+- 감사의견 변화, 특수관계자 거래, 회계정책 변경을 주시
+- 부채 만기 구조와 이자보상배율을 함께 확인
+- 집중 리스크(매출처, 공급처, 지역)를 파악
+"""
+ANALYSIS_PHILOSOPHY_COMPACT = """\
+## 분석 원칙
+1. 숫자 뒤의 "왜?"를 추적 (매출=물량×단가×믹스, 비용=원가+판관비)
+2. 이익의 질 의심 (CF vs NI, Accrual Ratio, 운전자본 변화)
+3. DuPont/부문/성격별 분해 — 합산만 보면 구조 변화를 놓침
+4. 공시 서술 ↔ 재무 수치 교차검증 — 불일치 시 명시적 지적
+5. 3~5년 추세 판단 — 일회성 vs 반복성 분리
+6. "무엇이 잘못될 수 있는가?" 먼저 탐색 — 리스크 선행
+"""

src/dartlab/ai/eval/batchResults/batch_ollama_20260327_124945.jsonl ADDED Viewed

	@@ -0,0 +1,35 @@

+{"caseId": "researchGather.structure.recentDisclosures", "persona": "research_gather", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "accountant.costByNature.summary", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "accountant.audit.redFlags", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "investor.dividend.sustainability", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "investor.downside.risks", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "investor.distress.sdi", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "analyst.margin.drivers", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "analyst.segments.lgchem", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "analyst.evidence.recentDisclosures", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.0, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "accountant.ambiguous.costStructure", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 5.0, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "analyst.quarterly.operatingProfit", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": null, "overall": 4.0, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 0.0, "failureTypes": ["generation_failure", "retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "analyst.quarterly.revenue", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": null, "overall": 4.0, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 0.0, "failureTypes": ["generation_failure", "retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "investor.profitMargin.context", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "investor.growth.cashflowTrend", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "analyst.growth.futurePlan", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.0, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "investor.growth.revenueGrowth", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "analyst.valuation.perComparison", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "investor.valuation.intrinsicValue", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "analyst.valuation.roe", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.166666666666666, "routeMatch": 1.0, "moduleUtilization": 0.6666666666666666, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "investor.report.majorHolder", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "accountant.report.executivePay", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "analyst.context.evidenceCitation", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "businessOwner.context.riskFactors", "persona": "business_owner", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "investor.context.disclosureChange", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.0, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "analyst.notes.rndExpense", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "accountant.notes.tangibleAsset", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 6.0, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "analyst.notes.segmentDetail", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.0, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "accountant.edge.financialCompany", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 6.166666666666666, "routeMatch": 1.0, "moduleUtilization": 0.6666666666666666, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "accountant.cost.rndRatio", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 6.166666666666666, "routeMatch": 1.0, "moduleUtilization": 0.6666666666666666, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "analyst.cost.opexBreakdown", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "analyst.deep.comprehensiveHealth", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "investor.deep.investmentThesis", "persona": "investor", "severity": "critical", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "investor.followup.deeperDividend", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "analyst.followup.whyMarginDrop", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.166666666666666, "routeMatch": 1.0, "moduleUtilization": 0.6666666666666666, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["clarification_failure", "retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
+{"caseId": "accountant.stability.debtAnalysis", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}

src/dartlab/ai/eval/batchResults/batch_ollama_20260327_131602.jsonl ADDED Viewed

	@@ -0,0 +1,4 @@

+{"caseId": "analyst.quarterly.operatingProfit", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": null, "overall": 5.0, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 0.0, "failureTypes": ["generation_failure"], "answerLength": 0, "timestamp": "20260327_131602"}
+{"caseId": "analyst.quarterly.revenue", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": null, "overall": 8.727272727272727, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 0.8181818181818182, "failureTypes": [], "answerLength": 739, "timestamp": "20260327_131602"}
+{"caseId": "analyst.deep.comprehensiveHealth", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": null, "overall": 10.083333333333332, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": [], "answerLength": 687, "timestamp": "20260327_131602"}
+{"caseId": "investor.deep.investmentThesis", "persona": "investor", "severity": "critical", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure"], "answerLength": 918, "timestamp": "20260327_131602"}

src/dartlab/ai/eval/batchResults/batch_ollama_20260327_132810.jsonl ADDED Viewed

	@@ -0,0 +1,11 @@

+{"caseId": "analyst.margin.drivers", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 8.083333333333334, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": [], "answerLength": 186, "timestamp": "20260327_132810"}
+{"caseId": "analyst.segments.lgchem", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 9.25, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": [], "answerLength": 407, "timestamp": "20260327_132810"}
+{"caseId": "analyst.evidence.recentDisclosures", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 8.166666666666666, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure"], "answerLength": 310, "timestamp": "20260327_132810"}
+{"caseId": "analyst.growth.futurePlan", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 8.5, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure"], "answerLength": 319, "timestamp": "20260327_132810"}
+{"caseId": "analyst.valuation.perComparison", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_132810"}
+{"caseId": "analyst.valuation.roe", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 10.537878787878789, "routeMatch": 1.0, "moduleUtilization": 0.6666666666666666, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure"], "answerLength": 375, "timestamp": "20260327_132810"}
+{"caseId": "analyst.context.evidenceCitation", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 9.916666666666668, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure"], "answerLength": 804, "timestamp": "20260327_132810"}
+{"caseId": "analyst.notes.rndExpense", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 9.291666666666666, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": [], "answerLength": 61, "timestamp": "20260327_132810"}
+{"caseId": "analyst.notes.segmentDetail", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_132810"}
+{"caseId": "analyst.cost.opexBreakdown", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 9.0, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": [], "answerLength": 235, "timestamp": "20260327_132810"}
+{"caseId": "analyst.followup.whyMarginDrop", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 10.333333333333334, "routeMatch": 1.0, "moduleUtilization": 0.6666666666666666, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["clarification_failure", "retrieval_failure"], "answerLength": 872, "timestamp": "20260327_132810"}

src/dartlab/ai/eval/diagnosisReports/diagnosis_batch_20260327_124945.md ADDED Viewed

	@@ -0,0 +1,21 @@

+# Eval 진단 리포트 — 2026-03-27 12:49
+## 약점 유형 (하위 점수)
+| 유형 | 평균 점수 | 케이스 수 | 주요 실패 |
+|------|---------|---------|---------|
+| unknown | 5.98 | 35 | runtime_error, retrieval_failure, empty_answer |
+# 개선 계획 (Remediation)
+| 우선순위 | Failure | 대상 파일 | 설명 | 영향도 |
+|---------|---------|----------|------|-------|
+| P1 | retrieval_failure | `engines/ai/context/finance_context.py` | _QUESTION_MODULES 매핑에 모듈 추가 (발생 20회) | high |
+| P3 | generation_failure | `engines/ai/conversation/templates/analysis_rules.py` | 분석 규칙에 few-shot 예시 추가 (발생 2회) | medium |
+| P4 | clarification_failure | `engines/ai/conversation/system_base.py` | clarification 정책 조건 수정 (발생 1회) | low |
+| P5 | empty_answer | `(매핑 없음)` | 새 failure 유형 — 매핑 추가 필요 (발생 15회) | unknown |
+| P5 | runtime_error | `(매핑 없음)` | 새 failure 유형 — 매핑 추가 필요 (발생 35회) | unknown |
+**즉시 조치 필요**: 1건
+- [retrieval_failure] → `engines/ai/context/finance_context.py`

src/dartlab/ai/eval/diagnosisReports/diagnosis_batch_20260327_131602.md ADDED Viewed

	@@ -0,0 +1,15 @@

+# Eval 진단 리포트 — 2026-03-27 13:16
+## 약점 유형 (하위 점수)
+| 유형 | 평균 점수 | 케이스 수 | 주요 실패 |
+|------|---------|---------|---------|
+| unknown | 7.33 | 4 | generation_failure, retrieval_failure |
+# 개선 계획 (Remediation)
+| 우선순위 | Failure | 대상 파일 | 설명 | 영향도 |
+|---------|---------|----------|------|-------|
+| P3 | retrieval_failure | `engines/ai/context/finance_context.py` | _QUESTION_MODULES 매핑에 모듈 추가 (발생 1회) | high |
+| P4 | generation_failure | `engines/ai/conversation/templates/analysis_rules.py` | 분석 규칙에 few-shot 예시 추가 (발생 1회) | medium |

src/dartlab/ai/memory/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+"""세션 간 분석 메모리 — SQLite 기반.
+종목별 분석 히스토리를 영속하여 재분석 시 이전 맥락을 활용한다.
+"""
+from dartlab.ai.memory.store import AnalysisMemory
+__all__ = ["AnalysisMemory"]

src/dartlab/ai/memory/store.py ADDED Viewed

	@@ -0,0 +1,154 @@

+"""분석 메모리 저장소 — SQLite 기반 세션 간 영속.
+Company 객체(200~500MB)는 저장하지 않는다.
+stockCode + 시점 + 질문 요약 + 결과 요약만 저장하여 메모리 안전.
+"""
+from __future__ import annotations
+import sqlite3
+import time
+from dataclasses import dataclass
+from pathlib import Path
+_DB_FILENAME = "analysisMemory.db"
+_MAX_DB_SIZE_MB = 50
+_MAX_SUMMARY_CHARS = 500
+# 싱글턴 인스턴스
+_instance: AnalysisMemory | None = None
+@dataclass(frozen=True)
+class MemoryRecord:
+    """저장된 분석 기록."""
+    stockCode: str
+    question: str
+    questionType: str
+    resultSummary: str
+    timestamp: float
+    grade: str | None = None
+class AnalysisMemory:
+    """SQLite 기반 분석 히스토리 저장소."""
+    def __init__(self, dbPath: Path | None = None) -> None:
+        if dbPath is None:
+            dbPath = Path.home() / ".dartlab" / _DB_FILENAME
+        self._dbPath = dbPath
+        self._conn: sqlite3.Connection | None = None
+    def _ensureDb(self) -> sqlite3.Connection:
+        """lazy init — AI 분석 시에만 연결."""
+        if self._conn is not None:
+            return self._conn
+        self._dbPath.parent.mkdir(parents=True, exist_ok=True)
+        conn = sqlite3.connect(str(self._dbPath), timeout=5)
+        conn.execute(
+            """CREATE TABLE IF NOT EXISTS analysis (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                stockCode TEXT NOT NULL,
+                question TEXT NOT NULL,
+                questionType TEXT DEFAULT '',
+                resultSummary TEXT DEFAULT '',
+                grade TEXT DEFAULT '',
+                timestamp REAL NOT NULL
+            )"""
+        )
+        conn.execute("CREATE INDEX IF NOT EXISTS idx_stock ON analysis(stockCode)")
+        conn.execute("CREATE INDEX IF NOT EXISTS idx_ts ON analysis(timestamp)")
+        conn.commit()
+        self._conn = conn
+        return conn
+    def saveAnalysis(
+        self,
+        stockCode: str,
+        question: str,
+        questionType: str = "",
+        resultSummary: str = "",
+        grade: str | None = None,
+    ) -> None:
+        """분석 결과 저장."""
+        conn = self._ensureDb()
+        summary = resultSummary[:_MAX_SUMMARY_CHARS] if resultSummary else ""
+        conn.execute(
+            "INSERT INTO analysis (stockCode, question, questionType, resultSummary, grade, timestamp) "
+            "VALUES (?, ?, ?, ?, ?, ?)",
+            (stockCode, question[:200], questionType, summary, grade or "", time.time()),
+        )
+        conn.commit()
+        self._enforceSizeLimit(conn)
+    def recallForStock(
+        self,
+        stockCode: str,
+        limit: int = 5,
+        decayDays: int = 90,
+    ) -> list[MemoryRecord]:
+        """종목별 최근 분석 기록 조회 (시간 감쇠 적용)."""
+        conn = self._ensureDb()
+        cutoff = time.time() - (decayDays * 86400)
+        rows = conn.execute(
+            "SELECT stockCode, question, questionType, resultSummary, timestamp, grade "
+            "FROM analysis WHERE stockCode = ? AND timestamp > ? "
+            "ORDER BY timestamp DESC LIMIT ?",
+            (stockCode, cutoff, limit),
+        ).fetchall()
+        return [
+            MemoryRecord(
+                stockCode=r[0],
+                question=r[1],
+                questionType=r[2],
+                resultSummary=r[3],
+                timestamp=r[4],
+                grade=r[5] or None,
+            )
+            for r in rows
+        ]
+    def toPromptContext(self, stockCode: str) -> str:
+        """이전 분석 기록을 프롬프트용 텍스트로 변환."""
+        records = self.recallForStock(stockCode)
+        if not records:
+            return ""
+        lines = ["## 이전 분석 기록"]
+        for r in records:
+            import datetime
+            dt = datetime.datetime.fromtimestamp(r.timestamp).strftime("%Y-%m-%d")
+            grade_str = f" [등급: {r.grade}]" if r.grade else ""
+            lines.append(f"- **{dt}** ({r.questionType}){grade_str}: {r.question}")
+            if r.resultSummary:
+                lines.append(f"  → {r.resultSummary[:200]}")
+        return "\n".join(lines)
+    def _enforceSizeLimit(self, conn: sqlite3.Connection) -> None:
+        """DB 크기 제한 — 초과 시 오래된 레코드 삭제."""
+        try:
+            dbSize = self._dbPath.stat().st_size / (1024 * 1024)
+            if dbSize > _MAX_DB_SIZE_MB:
+                conn.execute(
+                    "DELETE FROM analysis WHERE id IN (SELECT id FROM analysis ORDER BY timestamp ASC LIMIT 100)"
+                )
+                conn.execute("VACUUM")
+                conn.commit()
+        except OSError:
+            pass
+    def close(self) -> None:
+        """연결 종료."""
+        if self._conn:
+            self._conn.close()
+            self._conn = None
+def getMemory() -> AnalysisMemory:
+    """싱글턴 메모리 인스턴스."""
+    global _instance
+    if _instance is None:
+        _instance = AnalysisMemory()
+    return _instance

src/dartlab/ai/memory/summarizer.py ADDED Viewed

	@@ -0,0 +1,55 @@

+"""분석 결과 요약기 — 규칙 기반 (LLM 호출 없이).
+LLM 답변에서 저장용 요약을 추출한다.
+"""
+from __future__ import annotations
+import re
+def summarizeResponse(response: str, maxChars: int = 500) -> str:
+    """LLM 응답에서 핵심 요약 추출."""
+    if not response:
+        return ""
+    # 1. "종합" 또는 "결론" 섹션 추출 시도
+    conclusionMatch = re.search(
+        r"(?:##?\s*(?:종합|결론|요약|판단|Bull|강점).*?\n)(.*?)(?:\n##|\Z)",
+        response,
+        re.DOTALL,
+    )
+    if conclusionMatch:
+        text = conclusionMatch.group(1).strip()
+        return _cleanText(text, maxChars)
+    # 2. 마지막 단락 추출
+    paragraphs = [p.strip() for p in response.split("\n\n") if p.strip()]
+    if paragraphs:
+        lastParagraph = paragraphs[-1]
+        # 테이블이나 코드 블록이 아닌 마지막 텍스트 단락
+        for p in reversed(paragraphs):
+            if not p.startswith("|") and not p.startswith("```"):
+                return _cleanText(p, maxChars)
+        return _cleanText(lastParagraph, maxChars)
+    return _cleanText(response, maxChars)
+def extractGrade(response: str) -> str | None:
+    """응답에서 등급 정보 추출."""
+    # "종합 등급: B+" 같은 패턴
+    gradeMatch = re.search(r"종합\s*(?:등급|점수)\s*[:：]\s*([A-F][+-]?)", response)
+    if gradeMatch:
+        return gradeMatch.group(1)
+    return None
+def _cleanText(text: str, maxChars: int) -> str:
+    """마크다운 정리 + 길이 제한."""
+    # 마크다운 헤더, 볼드, 이모지 제거
+    cleaned = re.sub(r"[#*_`]", "", text)
+    cleaned = re.sub(r"\s+", " ", cleaned).strip()
+    if len(cleaned) > maxChars:
+        return cleaned[: maxChars - 3] + "..."
+    return cleaned

src/dartlab/ai/providers/oauth_codex.py CHANGED Viewed

@@ -29,22 +29,68 @@ log = logging.getLogger(__name__)
 CODEX_API_BASE = "https://chatgpt.com/backend-api"
 CODEX_RESPONSES_PATH = "/codex/responses"
-AVAILABLE_MODELS = [
     "gpt-5.4",
-    "gpt-5.3",
     "gpt-5.3-codex",
-    "gpt-5.2",
     "gpt-5.2-codex",
-    "gpt-5.1",
-    "gpt-5.1-codex",
-    "gpt-5.1-codex-mini",
-    "o3",
-    "o4-mini",
-    "gpt-4.1",
-    "gpt-4.1-mini",
-    "gpt-4.1-nano",
 ]
 class ChatGPTOAuthError(Exception):
     """ChatGPT OAuth provider 에러 — action 필드로 사용자 대응 안내."""

 CODEX_API_BASE = "https://chatgpt.com/backend-api"
 CODEX_RESPONSES_PATH = "/codex/responses"
+_BUNDLED_MODELS = [
     "gpt-5.4",
     "gpt-5.3-codex",
     "gpt-5.2-codex",
+    "gpt-5.1-codex-max",
 ]
+_MODELS_CACHE: list[str] | None = None
+_MODELS_CACHE_TS: float = 0.0
+_MODELS_CACHE_TTL = 300.0  # 5분
+def _fetchRemoteModels(token: str) -> list[str] | None:
+    """원격 /models API에서 사용 가능한 모델 목록 조회 (Codex CLI 동일 방식)."""
+    url = f"{CODEX_API_BASE}/codex/models"
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "originator": "codex_cli_rs",
+    }
+    accountId = oauthToken.get_account_id()
+    if accountId:
+        headers["chatgpt-account-id"] = accountId
+    try:
+        resp = requests.get(url, headers=headers, timeout=10)
+        if resp.status_code != 200:
+            return None
+        data = resp.json()
+        models = []
+        for item in data if isinstance(data, list) else data.get("models", data.get("data", [])):
+            modelId = item.get("id") or item.get("model") if isinstance(item, dict) else str(item)
+            if modelId:
+                models.append(modelId)
+        return models if models else None
+    except (requests.RequestException, json.JSONDecodeError, ValueError):
+        return None
+def availableModels() -> list[str]:
+    """사용 가능한 모델 목록 — 원격 조회 + 캐시 + 번들 fallback."""
+    import time
+    global _MODELS_CACHE, _MODELS_CACHE_TS
+    now = time.time()
+    if _MODELS_CACHE and (now - _MODELS_CACHE_TS) < _MODELS_CACHE_TTL:
+        return _MODELS_CACHE
+    try:
+        token = oauthToken.get_valid_token()
+    except (TokenRefreshError, OSError):
+        token = None
+    if token:
+        remote = _fetchRemoteModels(token)
+        if remote:
+            _MODELS_CACHE = remote
+            _MODELS_CACHE_TS = now
+            return remote
+    _MODELS_CACHE = list(_BUNDLED_MODELS)
+    _MODELS_CACHE_TS = now
+    return _MODELS_CACHE
 class ChatGPTOAuthError(Exception):
     """ChatGPT OAuth provider 에러 — action 필드로 사용자 대응 안내."""

src/dartlab/ai/runtime/agent.py CHANGED Viewed

@@ -10,6 +10,7 @@ import json
 from typing import Any, Callable, Generator
 from dartlab.ai.providers.base import BaseProvider
 from dartlab.ai.tools.registry import (
     build_tool_runtime,
 )
@@ -51,6 +52,7 @@ def agent_loop(
     """
     tool_runtime = runtime or build_tool_runtime(company, name="agent-loop")
     tools = selectTools(tool_runtime, questionType=question_type, maxTools=max_tools, hasCompany=company is not None)
     last_answer = ""
@@ -66,10 +68,17 @@ def agent_loop(
         # 도구 실행 + 결과 추가
         for tc in response.tool_calls:
             if on_tool_call:
                 on_tool_call(tc.name, tc.arguments)
             result = tool_runtime.execute_tool(tc.name, tc.arguments)
             if on_tool_result:
                 on_tool_result(tc.name, result)
@@ -92,6 +101,21 @@ _REFLECTION_PROMPT = (
 )
 def _reflect_on_answer(provider: BaseProvider, messages: list[dict], answer: str) -> str:
     """답변 자체 검증 — 1회 reflection으로 품질 보완."""
     reflect_messages = [
@@ -123,6 +147,7 @@ def agent_loop_stream(
     """
     tool_runtime = runtime or build_tool_runtime(company, name="agent-stream")
     tools = selectTools(tool_runtime, questionType=question_type, maxTools=max_tools, hasCompany=company is not None)
     # 대화형 질문은 첫 턴 도구 강제 안 함
     _isConversation = question_type in ("대화", "메타")
@@ -157,10 +182,17 @@ def agent_loop_stream(
         messages.append(provider.format_assistant_tool_calls(response.answer, response.tool_calls))
         for tc in response.tool_calls:
             if on_tool_call:
                 on_tool_call(tc.name, tc.arguments)
             result = tool_runtime.execute_tool(tc.name, tc.arguments)
             if on_tool_result:
                 on_tool_result(tc.name, result)
@@ -259,7 +291,7 @@ def agent_loop_planning(
     steps = plan.get("steps", [])[:max_steps]
     # 2단계: 계획 순차 실행
-    results: list[dict[str, str]] = []
     for step in steps:
         tool_name = step.get("tool", "")
         args = step.get("args", {})
@@ -268,17 +300,13 @@ def agent_loop_planning(
             on_tool_call(tool_name, args)
         result = tool_runtime.execute_tool(tool_name, args)
         if on_tool_result:
             on_tool_result(tool_name, result)
-        results.append({"tool": tool_name, "result": result[:3000]})
     # 3단계: 종합 답변 생성
-    synthesis_parts = [f"질문: {question}", "", "## 수집된 데이터:"]
-    for r in results:
-        synthesis_parts.append(f"\n### {r['tool']}")
-        synthesis_parts.append(r["result"])
     synthesis_parts.append("\n## 지시사항:")
     synthesis_parts.append(
         "위 데이터를 종합하여 사용자 질문에 대한 구조화된 답변을 작성하세요. "
@@ -291,3 +319,92 @@ def agent_loop_planning(
     ]
     final_resp = provider.complete(synth_messages)
     return final_resp.answer

 from typing import Any, Callable, Generator
 from dartlab.ai.providers.base import BaseProvider
+from dartlab.ai.runtime.scratchpad import Scratchpad
 from dartlab.ai.tools.registry import (
     build_tool_runtime,
 )
     """
     tool_runtime = runtime or build_tool_runtime(company, name="agent-loop")
     tools = selectTools(tool_runtime, questionType=question_type, maxTools=max_tools, hasCompany=company is not None)
+    pad = Scratchpad()
     last_answer = ""
         # 도구 실행 + 결과 추가
         for tc in response.tool_calls:
+            # 중복 호출 방지
+            warning = pad.getDuplicateWarning(tc.name)
+            if warning:
+                messages.append(provider.format_tool_result(tc.id, warning))
+                continue
             if on_tool_call:
                 on_tool_call(tc.name, tc.arguments)
             result = tool_runtime.execute_tool(tc.name, tc.arguments)
+            pad.addEntry(tc.name, tc.arguments, result)
             if on_tool_result:
                 on_tool_result(tc.name, result)
 )
+def _buildReflectionPrompt(questionType: str | None = None) -> str:
+    """스킬 checkpoints가 있으면 reflection 프롬프트에 추가."""
+    base = _REFLECTION_PROMPT
+    try:
+        from dartlab.ai.skills.registry import matchSkill
+        skill = matchSkill("", questionType=questionType)
+        if skill and skill.checkpoints:
+            checks = "\n".join(f"- {c}" for c in skill.checkpoints)
+            return base + f"\n\n**추가 검증 기준 ({skill.name}):**\n{checks}"
+    except Exception:
+        pass
+    return base
 def _reflect_on_answer(provider: BaseProvider, messages: list[dict], answer: str) -> str:
     """답변 자체 검증 — 1회 reflection으로 품질 보완."""
     reflect_messages = [
     """
     tool_runtime = runtime or build_tool_runtime(company, name="agent-stream")
     tools = selectTools(tool_runtime, questionType=question_type, maxTools=max_tools, hasCompany=company is not None)
+    pad = Scratchpad()
     # 대화형 질문은 첫 턴 도구 강제 안 함
     _isConversation = question_type in ("대화", "메타")
         messages.append(provider.format_assistant_tool_calls(response.answer, response.tool_calls))
         for tc in response.tool_calls:
+            # 중복 호출 방지
+            warning = pad.getDuplicateWarning(tc.name)
+            if warning:
+                messages.append(provider.format_tool_result(tc.id, warning))
+                continue
             if on_tool_call:
                 on_tool_call(tc.name, tc.arguments)
             result = tool_runtime.execute_tool(tc.name, tc.arguments)
+            pad.addEntry(tc.name, tc.arguments, result)
             if on_tool_result:
                 on_tool_result(tc.name, result)
     steps = plan.get("steps", [])[:max_steps]
     # 2단계: 계획 순차 실행
+    pad = Scratchpad()
     for step in steps:
         tool_name = step.get("tool", "")
         args = step.get("args", {})
             on_tool_call(tool_name, args)
         result = tool_runtime.execute_tool(tool_name, args)
+        pad.addEntry(tool_name, args, result)
         if on_tool_result:
             on_tool_result(tool_name, result)
     # 3단계: 종합 답변 생성
+    synthesis_parts = [f"질문: {question}", "", "## 수집된 데이터:", pad.toContext()]
     synthesis_parts.append("\n## 지시사항:")
     synthesis_parts.append(
         "위 데이터를 종합하여 사용자 질문에 대한 구조화된 답변을 작성하세요. "
     ]
     final_resp = provider.complete(synth_messages)
     return final_resp.answer
+# ══════════════════════════════════════
+# 자율 탐색 에이전트 (Tier 2 — 완전 분석)
+# ══════════════════════════════════════
+_SUFFICIENCY_HINT = (
+    "\n\n---\n"
+    "**안내**: 충분한 데이터를 수집했다면 도구를 더 호출하지 말고 최종 답변을 작성하세요. "
+    "아직 부족하면 추가 도구를 호출하세요."
+)
+def agentLoopAutonomous(
+    provider: BaseProvider,
+    messages: list[dict],
+    company: Any,
+    *,
+    maxTurns: int = 15,
+    maxTools: int | None = None,
+    runtime: ToolRuntime | None = None,
+    onToolCall: Callable[[str, dict], None] | None = None,
+    onToolResult: Callable[[str, str], None] | None = None,
+    questionType: str | None = None,
+    forceToolFirstTurn: bool = True,
+) -> Generator[str, None, None]:
+    """자율 탐색 에이전트: LLM이 충분하다고 판단할 때까지 도구 호출.
+    Phase 1 Scratchpad + Phase 4 Skill을 활용하여
+    report_mode에서 깊이 있는 분석을 수행한다.
+    """
+    tool_runtime = runtime or build_tool_runtime(company, name="agent-autonomous")
+    tools = selectTools(tool_runtime, questionType=questionType, maxTools=maxTools, hasCompany=company is not None)
+    pad = Scratchpad(tokenBudget=12000)
+    _isConversation = questionType in ("대화", "메타")
+    for _turn in range(maxTurns):
+        kwargs: dict = {}
+        if _turn == 0 and forceToolFirstTurn and not _isConversation and company is not None:
+            kwargs["tool_choice"] = "any"
+        try:
+            response = provider.complete_with_tools(messages, tools, **kwargs)
+        except TypeError:
+            response = provider.complete_with_tools(messages, tools)
+        if not response.tool_calls:
+            if _turn == 0:
+                yield from provider.stream(messages)
+                return
+            if response.answer and response.answer.strip():
+                yield response.answer
+            else:
+                yield from provider.stream(messages)
+            return
+        messages.append(provider.format_assistant_tool_calls(response.answer, response.tool_calls))
+        for tc in response.tool_calls:
+            warning = pad.getDuplicateWarning(tc.name)
+            if warning:
+                messages.append(provider.format_tool_result(tc.id, warning))
+                continue
+            if onToolCall:
+                onToolCall(tc.name, tc.arguments)
+            result = tool_runtime.execute_tool(tc.name, tc.arguments)
+            pad.addEntry(tc.name, tc.arguments, result)
+            if onToolResult:
+                onToolResult(tc.name, result)
+            messages.append(provider.format_tool_result(tc.id, result))
+        # 3턴 이후부터 충분성 힌트 + 사용 현황을 user 메시지로 추가
+        if _turn >= 2:
+            usageSummary = pad.getUsageSummary()
+            messages.append({"role": "user", "content": usageSummary + _SUFFICIENCY_HINT})
+    # maxTurns 도달 — 최종 종합 요청
+    synthPrompt = (
+        f"도구 호출이 최대 {maxTurns}턴에 도달했습니다. "
+        "지금까지 수집한 데이터를 기반으로 최종 종합 답변을 작성하세요.\n\n"
+        f"{pad.getUsageSummary()}"
+    )
+    messages.append({"role": "user", "content": synthPrompt})
+    yield from provider.stream(messages)

src/dartlab/ai/runtime/core.py CHANGED Viewed

@@ -19,6 +19,7 @@ dartlab.ask(), server UI, CLI가 모두 이 코어를 소비한다.
 from __future__ import annotations
 from typing import Any, Generator
 from dartlab.ai.runtime.events import AnalysisEvent
@@ -30,6 +31,7 @@ from dartlab.ai.runtime.post_processing import (
 )
 from dartlab.ai.runtime.run_modes import (
     _run_agent,
     _run_light_mode,
     _run_stream,
 )
@@ -99,6 +101,7 @@ def _build_included_evidence(included_tables: list[str]) -> list[dict[str, str]]
         "BS_quarterly": "분기별 재무상태표",
         "_dart_openapi_filings": "최근 공시 목록",
         "_diff": "공시 변화 비교",
         "_response_contract": "응답 계약",
         "_clarify": "확인 질문",
     }
@@ -147,6 +150,7 @@ def _context_label(module_name: str, explicit_label: str | None = None) -> str |
                 "segments": "사업부문 데이터",
                 "_dart_openapi_filings": "최근 공시 목록",
                 "_diff": "공시 변화 비교",
             }.items()
             if normalized == key or module_name == key
         ),
@@ -828,6 +832,17 @@ def _analyze_inner(
         dataReadyBlock = f"데이터 가용성\n{dataReadySummary}"
         dynamic_part = f"{dynamic_part}\n\n{dataReadyBlock}" if dynamic_part else dataReadyBlock
     if dialogue_policy:
         dynamic_part = dynamic_part + "\n\n" + dialogue_policy if dynamic_part else dialogue_policy
@@ -885,12 +900,17 @@ def _analyze_inner(
         # 모든 provider에서 Super Tool 모드 기본 활성화 — 8개 도구로 통합
         _useSuperTools = True
         effective_turns = max(max_turns, _estimate_max_turns(question, q_type or ""))
-        for _ev in _run_agent(
             llm,
             messages,
             company,
             question,
-            max_turns=effective_turns,
             max_tools=max_tools,
             q_type=q_type,
             useSuperTools=_useSuperTools,
@@ -932,6 +952,24 @@ def _analyze_inner(
         if response_meta.get("grade") or response_meta.get("has_conclusion"):
             _done_payload["responseMeta"] = response_meta
     # ── 15. Meta 업데이트 (includedModules, yearRange) ──
     if _included_tables:
         includedEvidence = _build_included_evidence(_included_tables)

 from __future__ import annotations
+import sqlite3
 from typing import Any, Generator
 from dartlab.ai.runtime.events import AnalysisEvent
 )
 from dartlab.ai.runtime.run_modes import (
     _run_agent,
+    _run_agent_autonomous,
     _run_light_mode,
     _run_stream,
 )
         "BS_quarterly": "분기별 재무상태표",
         "_dart_openapi_filings": "최근 공시 목록",
         "_diff": "공시 변화 비교",
+        "_changes": "공시 변화 요약",
         "_response_contract": "응답 계약",
         "_clarify": "확인 질문",
     }
                 "segments": "사업부문 데이터",
                 "_dart_openapi_filings": "최근 공시 목록",
                 "_diff": "공시 변화 비교",
+                "_changes": "공시 변화 요약",
             }.items()
             if normalized == key or module_name == key
         ),
         dataReadyBlock = f"데이터 가용성\n{dataReadySummary}"
         dynamic_part = f"{dynamic_part}\n\n{dataReadyBlock}" if dynamic_part else dataReadyBlock
+    # 이전 분석 기록 주입 (세션 간 메모리)
+    if stock_id:
+        try:
+            from dartlab.ai.memory.store import getMemory
+            memoryContext = getMemory().toPromptContext(stock_id)
+            if memoryContext:
+                dynamic_part = f"{dynamic_part}\n\n{memoryContext}" if dynamic_part else memoryContext
+        except (ImportError, OSError, sqlite3.Error):
+            pass
     if dialogue_policy:
         dynamic_part = dynamic_part + "\n\n" + dialogue_policy if dynamic_part else dialogue_policy
         # 모든 provider에서 Super Tool 모드 기본 활성화 — 8개 도구로 통합
         _useSuperTools = True
         effective_turns = max(max_turns, _estimate_max_turns(question, q_type or ""))
+        # report_mode → 자율 탐색 에이전트 (Tier 2)
+        _agent_fn = _run_agent_autonomous if report_mode else _run_agent
+        _effective_max = max(effective_turns, 15) if report_mode else effective_turns
+        for _ev in _agent_fn(
             llm,
             messages,
             company,
             question,
+            max_turns=_effective_max,
             max_tools=max_tools,
             q_type=q_type,
             useSuperTools=_useSuperTools,
         if response_meta.get("grade") or response_meta.get("has_conclusion"):
             _done_payload["responseMeta"] = response_meta
+    # ── 14.5. 분석 메모리 저장 ──
+    if stock_id and _full_response_parts:
+        try:
+            from dartlab.ai.memory.store import getMemory
+            from dartlab.ai.memory.summarizer import extractGrade, summarizeResponse
+            _fullText = "".join(_full_response_parts)
+            _mem = getMemory()
+            _mem.saveAnalysis(
+                stockCode=stock_id,
+                question=question[:200],
+                questionType=q_type or "",
+                resultSummary=summarizeResponse(_fullText),
+                grade=extractGrade(_fullText),
+            )
+        except (ImportError, OSError, sqlite3.Error):
+            pass
     # ── 15. Meta 업데이트 (includedModules, yearRange) ──
     if _included_tables:
         includedEvidence = _build_included_evidence(_included_tables)

src/dartlab/ai/runtime/run_modes.py CHANGED Viewed

@@ -1,6 +1,6 @@
-"""AI 분석 실행 모드 — light / guided_json / stream / agent.
-core.py의 _analyze_inner()에서 디스패치하는 4가지 실행 경로.
 """
 from __future__ import annotations
@@ -249,3 +249,88 @@ def _run_agent(
         yield AnalysisEvent("chart", chart_events.pop(0))
     while ui_events:
         yield ui_events.pop(0)

+"""AI 분석 실행 모드 — light / guided_json / stream / agent / autonomous.
+core.py의 _analyze_inner()에서 디스패치하는 5가지 실행 경로.
 """
 from __future__ import annotations
         yield AnalysisEvent("chart", chart_events.pop(0))
     while ui_events:
         yield ui_events.pop(0)
+# ── Autonomous agent mode (Tier 2) ──────────────────────
+def _run_agent_autonomous(
+    llm,
+    messages: list[dict],
+    company: Any,
+    question: str,
+    *,
+    max_turns: int = 15,
+    max_tools: int | None = None,
+    q_type: str | None = None,
+    useSuperTools: bool = True,
+    _full_response_parts: list[str],
+) -> Generator[AnalysisEvent, None, None]:
+    """자율 탐색 에이전트 — report_mode에서 깊이 분석."""
+    from dartlab.ai.runtime.agent import agentLoopAutonomous, build_agent_system_addition
+    from dartlab.ai.tools.registry import build_tool_runtime
+    runtime = build_tool_runtime(company, name="core-autonomous", useSuperTools=useSuperTools)
+    system_addition = build_agent_system_addition(runtime)
+    messages[0]["content"] += system_addition
+    tool_calls_log: list[dict] = []
+    tool_results_log: list[dict] = []
+    chart_events: list[dict] = []
+    ui_events: list[AnalysisEvent] = []
+    def _on_tool_call(name: str, arguments: dict) -> None:
+        tool_calls_log.append({"name": name, "arguments": arguments})
+    def _on_tool_result(name: str, result: str) -> None:
+        tool_results_log.append({"name": name, "result": result})
+        if name == "chart":
+            try:
+                parsed = json.loads(result)
+                charts = parsed.get("charts")
+                if charts:
+                    chart_events.append({"charts": charts})
+            except (json.JSONDecodeError, TypeError, KeyError):
+                pass
+        try:
+            parsed = json.loads(result)
+            if isinstance(parsed, dict) and parsed.get("action"):
+                ui_events.append(AnalysisEvent(EventKind.UI_ACTION, parsed))
+        except (json.JSONDecodeError, TypeError):
+            pass
+    for chunk in agentLoopAutonomous(
+        llm,
+        messages,
+        company,
+        maxTurns=max_turns,
+        maxTools=max_tools,
+        runtime=runtime,
+        onToolCall=_on_tool_call,
+        onToolResult=_on_tool_result,
+        questionType=q_type,
+    ):
+        while tool_calls_log:
+            tc = tool_calls_log.pop(0)
+            yield AnalysisEvent("tool_call", tc)
+        while tool_results_log:
+            tr = tool_results_log.pop(0)
+            yield AnalysisEvent("tool_result", tr)
+        while chart_events:
+            ce = chart_events.pop(0)
+            yield AnalysisEvent("chart", ce)
+        while ui_events:
+            yield ui_events.pop(0)
+        _full_response_parts.append(chunk)
+        yield AnalysisEvent("chunk", {"text": chunk})
+    while tool_calls_log:
+        yield AnalysisEvent("tool_call", tool_calls_log.pop(0))
+    while tool_results_log:
+        yield AnalysisEvent("tool_result", tool_results_log.pop(0))
+    while chart_events:
+        yield AnalysisEvent("chart", chart_events.pop(0))
+    while ui_events:
+        yield ui_events.pop(0)

src/dartlab/ai/runtime/scratchpad.py ADDED Viewed

	@@ -0,0 +1,115 @@

+"""도구 결과 누적/정리 엔진 — dexter scratchpad 패턴.
+에이전트 루프에서 도구 호출 결과를 구조적으로 관리한다:
+- 도구별 호출 횟수 추적 + 중복 방지
+- 토큰 예산 초과 시 오래된 결과 압축
+- LLM에 전달할 정리된 컨텍스트 생성
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any
+@dataclass
+class _Entry:
+    """단일 도구 호출 결과."""
+    toolName: str
+    args: dict[str, Any]
+    result: str
+    tokenEstimate: int
+    order: int
+@dataclass
+class Scratchpad:
+    """에이전트 루프 도구 결과 누적/정리."""
+    entries: list[_Entry] = field(default_factory=list)
+    callCounts: dict[str, int] = field(default_factory=dict)
+    _order: int = field(default=0, repr=False)
+    tokenBudget: int = 8000
+    # ── 핵심 API ──────────────────────────────────────
+    def addEntry(self, toolName: str, args: dict[str, Any], result: str) -> None:
+        """도구 결과 추가 (pruning 자동 적용)."""
+        from dartlab.ai.context.pruning import pruneToolResult
+        pruned = pruneToolResult(toolName, result)
+        tokens = _estimateTokens(pruned)
+        self._order += 1
+        self.entries.append(_Entry(toolName, args, pruned, tokens, self._order))
+        self.callCounts[toolName] = self.callCounts.get(toolName, 0) + 1
+        self.pruneIfNeeded()
+    def isDuplicateExceeded(self, toolName: str, maxCalls: int = 3) -> bool:
+        """같은 도구가 maxCalls 이상 호출됐는지."""
+        return self.callCounts.get(toolName, 0) >= maxCalls
+    def pruneIfNeeded(self) -> None:
+        """토큰 예산 초과 시 오래된 결과를 1줄 요약으로 압축."""
+        while self._totalTokens() > self.tokenBudget and len(self.entries) > 1:
+            oldest = self.entries[0]
+            summary = _summarizeLine(oldest.toolName, oldest.result)
+            oldest.result = summary
+            oldest.tokenEstimate = _estimateTokens(summary)
+            # 요약해도 여전히 초과면 제거
+            if self._totalTokens() > self.tokenBudget:
+                self.entries.pop(0)
+    def toContext(self) -> str:
+        """누적 결과를 마크다운으로 변환."""
+        if not self.entries:
+            return ""
+        parts: list[str] = []
+        for e in self.entries:
+            argsStr = ", ".join(f"{k}={v}" for k, v in e.args.items()) if e.args else ""
+            parts.append(f"### {e.toolName}({argsStr})\n{e.result}")
+        return "\n\n".join(parts)
+    def getUsageSummary(self) -> str:
+        """현재 도구 호출 현황 텍스트."""
+        if not self.callCounts:
+            return ""
+        lines = [f"- {name}: {count}회" for name, count in self.callCounts.items()]
+        total = self._totalTokens()
+        lines.append(f"- 컨텍스트: ~{total} 토큰 / {self.tokenBudget} 예산")
+        return "**도구 사용 현황:**\n" + "\n".join(lines)
+    def getDuplicateWarning(self, toolName: str) -> str | None:
+        """중복 초과 시 LLM에 전달할 경고 메시지."""
+        if not self.isDuplicateExceeded(toolName):
+            return None
+        count = self.callCounts.get(toolName, 0)
+        return (
+            f"⚠️ {toolName}을 이미 {count}회 호출했습니다. "
+            f"같은 도구를 반복 호출하지 말고, 수집된 데이터로 답변을 종합하세요."
+        )
+    # ── 내부 ──────────────────────────────────────────
+    def _totalTokens(self) -> int:
+        return sum(e.tokenEstimate for e in self.entries)
+def _estimateTokens(text: str) -> int:
+    """간이 토큰 추정 — 한글 2자=1토큰, 영문 4자=1토큰 근사."""
+    if not text:
+        return 0
+    korean = sum(1 for c in text if "\uac00" <= c <= "\ud7a3")
+    other = len(text) - korean
+    return korean // 2 + other // 4 + 1
+def _summarizeLine(toolName: str, result: str) -> str:
+    """도구 결과를 1줄 요약으로 압축."""
+    # 첫 줄 또는 첫 100자 + 줄 수 정보
+    lines = result.strip().split("\n")
+    firstLine = lines[0][:100] if lines else ""
+    if len(lines) > 1:
+        return f"[요약] {firstLine}... ({len(lines)}줄, {toolName})"
+    return f"[요약] {firstLine}"

src/dartlab/ai/skills/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+"""분석 스킬 — 프롬프트 기반 워크플로우 가이드.
+도구를 지정하지 않고 분석 목표만 선언한다.
+LLM이 현재 가용한 도구 중에서 자율 선택.
+"""
+from dartlab.ai.skills.registry import Skill, matchSkill
+__all__ = ["Skill", "matchSkill"]

src/dartlab/ai/skills/catalog.py ADDED Viewed

	@@ -0,0 +1,145 @@

+"""분석 영역별 스킬 카탈로그.
+도구를 지정하지 않는다 — 분석 목표만 선언.
+8대 영역이 안정화되면서 자연스럽게 효과가 향상된다.
+"""
+from __future__ import annotations
+from dartlab.ai.skills.registry import Skill
+SKILLS: tuple[Skill, ...] = (
+    Skill(
+        id="profitability",
+        name="수익성 심층 분석",
+        triggerKeywords=("수익성", "이익률", "마진", "ROE", "ROA", "영업이익률"),
+        analysisGoals=(
+            "ROE를 DuPont 분해하여 수익성/효율성/레버리지 동인 식별",
+            "영업이익률과 원가율 추세에서 비용 구조 변화 파악",
+            "영업CF/순이익 비율로 이익의 질 판단",
+            "부문별 수익성 차이가 있으면 세그먼트 분해",
+        ),
+        synthesisGuide="DuPont 분해 → 이익의 질 → 비용 동인 → 인과 관계 서술",
+        checkpoints=(
+            "DuPont 3요소 분해가 있는가?",
+            "CF/NI 비율을 인용했는가?",
+            "비용 구조 변화의 원인을 설명했는가?",
+        ),
+    ),
+    Skill(
+        id="health",
+        name="재무 건전성 분석",
+        triggerKeywords=("건전성", "부채", "유동성", "안정성", "재무구조", "부채비율"),
+        analysisGoals=(
+            "부채비율과 유동비율 추세로 구조적 안정성 판단",
+            "이자보상배율과 차입금 만기 구조 확인",
+            "운전자본 사이클(매출채권+재고-매입채무) 추이 분석",
+            "현금성 자산 대비 단기 의무 커버리지 확인",
+        ),
+        synthesisGuide="레버리지 구조 → 유동성 계층 → 부채 만기 → 종합 건전성 판단",
+        checkpoints=(
+            "유동비율과 부채비율 수치를 인용했는가?",
+            "이자보상배율을 확인했는가?",
+            "단기 유동성 위험을 평가했는가?",
+        ),
+    ),
+    Skill(
+        id="valuation",
+        name="밸류에이션 분석",
+        triggerKeywords=("밸류에이션", "적정가치", "목표가", "저평가", "고평가", "PER", "PBR", "DCF"),
+        analysisGoals=(
+            "핵심 멀티플(PER, PBR, EV/EBITDA) 산출 및 업종 비교",
+            "이익 성장률과 지속가능성을 근거로 적정 멀티플 범위 추정",
+            "가능하면 DCF 관점에서 내재가치 범위 제시",
+            "안전마진(현재가 vs 적정가치 범위) 판단",
+        ),
+        synthesisGuide="멀티플 비교 → 성장률 근거 → 적정가치 범위 → 안전마진 판단",
+        checkpoints=(
+            "PER/PBR 수치와 업종 비교가 있는가?",
+            "성장률 근거를 제시했는가?",
+            "적정가치 범위를 제시했는가? (단일 목표가 아닌 범위)",
+        ),
+    ),
+    Skill(
+        id="risk",
+        name="리스크 분석",
+        triggerKeywords=("리스크", "위험", "위기", "불확실성", "적색신호"),
+        analysisGoals=(
+            "재무 리스크: 유동성, 레버리지, 이자보상 역량",
+            "사업 리스크: 매출처 집중, 공급망 의존, 규제 변화",
+            "회계 리스크: 감사의견 변화, 특수관계자 거래, 회계정책 변경",
+            "공시에서 경영진이 직접 언급한 리스크 요인 확인",
+        ),
+        synthesisGuide="재무 리스크 → 사업 리스크 → 회계 리스크 → 종합 위험도 판단",
+        checkpoints=(
+            "적색 신호 체크리스트를 적용했는가?",
+            "공시 원문에서 리스크 관련 서술을 인용했는가?",
+        ),
+    ),
+    Skill(
+        id="strategy",
+        name="사업 전략 분석",
+        triggerKeywords=("사업", "전략", "경쟁우위", "비즈니스모델", "사업구조", "사업개요"),
+        analysisGoals=(
+            "사업 구조: 부문별 매출 비중과 수익성 차이",
+            "경쟁 우위: R&D 투자 강도, 마진 프리미엄, 고객 집중도",
+            "성장 전략: 유기적 성장 vs 인수, CAPEX 방향",
+            "공시 원문에서 경영진의 전략 서술 확인",
+        ),
+        synthesisGuide="사업 구조 분해 → 경쟁 우위 식별 → 성장 전략 평가 → 지속가능성 판단",
+        checkpoints=(
+            "부문별 매출/이익 비중을 분해했는가?",
+            "R&D/CAPEX 투자 방향을 확인했는가?",
+        ),
+    ),
+    Skill(
+        id="accounting",
+        name="회계 품질 분석",
+        triggerKeywords=("회계", "감사", "분식", "이익의질", "발생주의", "회계정책"),
+        analysisGoals=(
+            "Accrual Ratio 계산: (순이익-영업CF)/평균자산 — 10% 초과 시 의심",
+            "감사의견 변화와 감사인 교체 이력 확인",
+            "회계정책 변경(수익인식, 자본화, 감가상각) 영향 파악",
+            "매출채권/재고 증가율과 매출/원가 증가율 비교",
+        ),
+        synthesisGuide="Accrual Ratio → 감사 이력 → 회계정책 변경 → 이익의 질 종합",
+        checkpoints=(
+            "CF/NI 비율 또는 Accrual Ratio를 계산했는가?",
+            "감사의견을 확인했는가?",
+        ),
+    ),
+    Skill(
+        id="dividend",
+        name="배당 분석",
+        triggerKeywords=("배당", "배당금", "배당률", "배당성향", "주주환원"),
+        analysisGoals=(
+            "배당 추이: 배당금, 배당수익률, 배당성향 3~5년 시계열",
+            "배당 지속가능성: FCF 대비 배당금, 이익 안정성",
+            "주주환원 정책: 자사주 매입, 소각 이력 확인",
+            "동종업종 배당 수준 비교 (가능 시)",
+        ),
+        synthesisGuide="배당 추이 → 지속가능성(FCF) → 주주환원 정책 → 매력도 판단",
+        checkpoints=(
+            "배당성향과 배당수익률 수치를 인용했는가?",
+            "FCF 대비 배당 커버리지를 확인했는가?",
+        ),
+    ),
+    Skill(
+        id="comprehensive",
+        name="종합 분석",
+        triggerKeywords=("종합", "전반", "전체", "총평", "분석해줘", "어때"),
+        analysisGoals=(
+            "사업 구조와 경쟁 포지셔닝 파악",
+            "핵심 재무 지표(수익성, 건전성, 성장성) 3~5년 추세",
+            "이익의 질과 현금흐름 프로파일",
+            "적색 신호 체크 및 리스크 요인 식별",
+            "강점/약점 정리와 Bull/Bear 논거",
+        ),
+        synthesisGuide="사업 구조 → 재무 추세 → 이익의 질 → 리스크 → 강점/약점 → 종합 판단",
+        checkpoints=(
+            "최소 3개 이상의 재무 비율을 인용했는가?",
+            "강점과 약점을 균형 있게 제시했는가?",
+            "Bull/Bear 논거를 제시했는가?",
+        ),
+    ),
+)

src/dartlab/ai/skills/registry.py ADDED Viewed

	@@ -0,0 +1,58 @@

+"""스킬 레지스트리 — 분석 목표 기반 워크플로우 매칭.
+Skill은 도구를 지정하지 않는다.
+분석 목표(analysisGoals)와 종합 가이드(synthesisGuide)만 선언하고,
+LLM이 현재 가용한 도구 중에서 자율 선택한다.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+@dataclass(frozen=True)
+class Skill:
+    """분석 워크플로우 정의."""
+    id: str
+    name: str
+    triggerKeywords: tuple[str, ...]
+    analysisGoals: tuple[str, ...]
+    synthesisGuide: str
+    checkpoints: tuple[str, ...] = field(default_factory=tuple)
+    def toPrompt(self) -> str:
+        """시스템 프롬프트에 주입할 자연어 가이드."""
+        goals = "\n".join(f"  {i + 1}. {g}" for i, g in enumerate(self.analysisGoals))
+        checks = ""
+        if self.checkpoints:
+            checks = "\n**자체 검증:**\n" + "\n".join(f"  - {c}" for c in self.checkpoints)
+        return f"## 분석 스킬: {self.name}\n\n**분석 목표:**\n{goals}\n\n**종합 프레임:** {self.synthesisGuide}{checks}"
+def matchSkill(
+    question: str,
+    questionType: str | None = None,
+) -> Skill | None:
+    """질문에 가장 적합한 스킬 매칭."""
+    from dartlab.ai.skills.catalog import SKILLS
+    # 1차: questionType으로 직접 매칭
+    if questionType:
+        for skill in SKILLS:
+            if questionType in skill.triggerKeywords:
+                return skill
+    # 2차: 질문 텍스트 키워드 매칭
+    if not question:
+        return None
+    bestSkill: Skill | None = None
+    bestScore = 0
+    for skill in SKILLS:
+        score = sum(1 for kw in skill.triggerKeywords if kw in question)
+        if score > bestScore:
+            bestScore = score
+            bestSkill = skill
+    return bestSkill if bestScore > 0 else None

src/dartlab/ai/tools/defaults/helpers.py CHANGED Viewed

@@ -21,8 +21,11 @@ def df_to_md(df: pl.DataFrame, max_rows: int = 15, max_chars: int = 0, market: s
 def json_to_text(value: Any, max_chars: int = 4000) -> str:
-    """dict/list/json 직렬화."""
-    text = json.dumps(value, ensure_ascii=False, indent=2, default=str)
     if len(text) <= max_chars:
         return text
     return text[:max_chars] + "\n... (truncated)"

 def json_to_text(value: Any, max_chars: int = 4000) -> str:
+    """dict/list/json 직렬화 (pruning 후)."""
+    from dartlab.ai.context.pruning import _STRIP_FIELDS, _pruneValue
+    pruned = _pruneValue(value, _STRIP_FIELDS, depth=0)
+    text = json.dumps(pruned, ensure_ascii=False, indent=2, default=str)
     if len(text) <= max_chars:
         return text
     return text[:max_chars] + "\n... (truncated)"

src/dartlab/cli/commands/chat.py ADDED Viewed

	@@ -0,0 +1,472 @@

+"""`dartlab chat` command -- 인터랙티브 터미널 REPL."""
+from __future__ import annotations
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+from dartlab.cli.context import PROVIDERS
+from dartlab.cli.services.errors import CLIError
+from dartlab.cli.services.providers import detect_provider
+from dartlab.cli.services.runtime import configure_dartlab
+def configure_parser(subparsers) -> None:
+    parser = subparsers.add_parser("chat", help="대화형 AI 분석 (인터랙티브 REPL)")
+    parser.add_argument("company", nargs="?", default=None, help="종목코드 또는 회사명 (생략 가능)")
+    parser.add_argument("--provider", "-p", default=None, choices=PROVIDERS, help="LLM provider")
+    parser.add_argument("--model", "-m", default=None, help="모델명")
+    parser.add_argument("--base-url", default=None, help="커스텀 API URL")
+    parser.add_argument("--api-key", default=None, help="API 키")
+    parser.add_argument("--continue", dest="cont", action="store_true", help="이전 대화 이어가기")
+    parser.set_defaults(handler=run)
+# ---------------------------------------------------------------------------
+# State
+# ---------------------------------------------------------------------------
+@dataclass
+class _ChatState:
+    """REPL 세션 상태."""
+    company: Any | None = None
+    stockCode: str | None = None
+    provider: str | None = None
+    model: str | None = None
+    baseUrl: str | None = None
+    apiKey: str | None = None
+    sessionId: int | None = None
+    history: list[dict[str, str]] = field(default_factory=list)
+# ---------------------------------------------------------------------------
+# Entry
+# ---------------------------------------------------------------------------
+def run(args) -> int:
+    from rich.console import Console
+    configure_dartlab()
+    console = Console()
+    provider = args.provider or detect_provider()
+    state = _ChatState(
+        provider=provider,
+        model=args.model,
+        baseUrl=args.base_url,
+        apiKey=args.api_key,
+    )
+    if args.company:
+        if not _loadCompany(state, args.company, console):
+            raise CLIError(f"종목을 찾을 수 없습니다: {args.company}")
+    if args.cont and state.stockCode:
+        _resumeSession(state, console)
+    _printWelcome(state, console)
+    _replLoop(state, console)
+    return 0
+# ---------------------------------------------------------------------------
+# REPL loop
+# ---------------------------------------------------------------------------
+_SLASH_WORDS = ["/help", "/company", "/model", "/clear", "/suggest", "/status", "/quit", "/exit", "/q"]
+def _replLoop(state: _ChatState, console) -> None:
+    promptFn = _makePromptFn()
+    while True:
+        prompt = _buildPrompt(state)
+        try:
+            userInput = promptFn(prompt)
+        except KeyboardInterrupt:
+            continue
+        except EOFError:
+            console.print("\n[dim]채팅을 종료합니다.[/]")
+            break
+        userInput = userInput.strip()
+        if not userInput:
+            continue
+        if userInput.startswith("/"):
+            shouldExit = _handleSlash(userInput, state, console)
+            if shouldExit:
+                break
+            continue
+        # 종목 없으면 텍스트에서 자동 감지 시도
+        if state.company is None:
+            _tryAutoDetect(userInput, state, console)
+        _executeQuery(userInput, state, console)
+def _makePromptFn():
+    """prompt_toolkit PromptSession을 반환. 터미널이 아니면 input() fallback."""
+    try:
+        import sys
+        if not sys.stdin.isatty():
+            return input
+        from prompt_toolkit import PromptSession
+        from prompt_toolkit.completion import WordCompleter
+        from prompt_toolkit.history import FileHistory
+        historyDir = Path.home() / ".dartlab"
+        historyDir.mkdir(parents=True, exist_ok=True)
+        historyFile = historyDir / "chat.history"
+        completer = WordCompleter(_SLASH_WORDS, sentence=True)
+        session = PromptSession(
+            history=FileHistory(str(historyFile)),
+            completer=completer,
+        )
+        return session.prompt
+    except (ImportError, RuntimeError, OSError):
+        return input
+def _buildPrompt(state: _ChatState) -> str:
+    if state.company:
+        return f"\ndartlab {state.company.corpName} > "
+    return "\ndartlab > "
+# ---------------------------------------------------------------------------
+# Query execution
+# ---------------------------------------------------------------------------
+def _executeQuery(question: str, state: _ChatState, console) -> None:
+    from rich.live import Live
+    from rich.markdown import Markdown
+    from rich.text import Text
+    from dartlab.ai.runtime.core import analyze
+    events = analyze(
+        state.company,
+        question,
+        provider=state.provider,
+        model=state.model,
+        base_url=state.baseUrl,
+        api_key=state.apiKey,
+        use_tools=True,
+        history=state.history if state.history else None,
+    )
+    buffer = ""
+    toolStartTime: float | None = None
+    toolPanels: list[str] = []  # tool 결과 데이터 누적 (LLM 응답 전 표시)
+    queryStart = time.monotonic()
+    try:
+        with Live(console=console, refresh_per_second=8, vertical_overflow="visible") as live:
+            for ev in events:
+                if ev.kind == "chunk":
+                    buffer += ev.data["text"]
+                    live.update(Markdown(buffer))
+                elif ev.kind == "tool_call":
+                    toolName = ev.data.get("name", "")
+                    label = _toolLabel(toolName)
+                    toolStartTime = time.monotonic()
+                    live.update(Markdown(buffer + f"\n\n> {label} 조회 중..."))
+                elif ev.kind == "tool_result":
+                    toolName = ev.data.get("name", "")
+                    label = _toolLabel(toolName)
+                    elapsed = ""
+                    if toolStartTime is not None:
+                        dt = time.monotonic() - toolStartTime
+                        elapsed = f" ({dt:.1f}s)"
+                        toolStartTime = None
+                    # tool 결과 데이터 수집
+                    resultText = ev.data.get("result", "")
+                    preview = _toolResultPreview(resultText)
+                    statusLine = f"> {label} 완료{elapsed}"
+                    if preview:
+                        statusLine += f" -- {preview}"
+                        toolPanels.append(resultText)
+                    live.update(Markdown(buffer + f"\n\n{statusLine}"))
+                elif ev.kind == "error":
+                    errorMsg = ev.data.get("error", "알 수 없는 오류")
+                    console.print(f"\n  [red]{errorMsg}[/]")
+                    return
+    except KeyboardInterrupt:
+        console.print("\n  [dim]응답 중단[/]")
+    # tool 결과 데이터 인라인 표시 (LLM 응답 전에 나온 테이블)
+    if toolPanels:
+        console.print()
+        for panel in toolPanels:
+            _renderToolData(panel, console)
+    console.print()
+    # done 요약 (소요 시간)
+    totalElapsed = time.monotonic() - queryStart
+    console.print(Text(f"  {totalElapsed:.1f}s", style="dim"))
+    if buffer:
+        state.history.append({"role": "user", "content": question})
+        state.history.append({"role": "assistant", "content": buffer})
+        _saveMessage(state, "user", question)
+        _saveMessage(state, "assistant", buffer)
+def _toolResultPreview(resultText: str) -> str:
+    """tool 결과 텍스트에서 한 줄 요약을 추출한다."""
+    if not resultText or resultText.startswith("[오류]"):
+        return ""
+    lines = resultText.strip().splitlines()
+    # markdown 테이블이 있으면 행 수 표시
+    tableRows = [ln for ln in lines if ln.startswith("|") and "---" not in ln]
+    if len(tableRows) > 1:
+        return f"{len(tableRows) - 1}행"  # 헤더 제외
+    # 일반 텍스트면 첫 줄 앞부분
+    firstLine = lines[0].strip().lstrip("#").strip() if lines else ""
+    if len(firstLine) > 60:
+        firstLine = firstLine[:57] + "..."
+    return firstLine
+def _renderToolData(resultText: str, console) -> None:
+    """tool 결과를 Rich로 렌더링한다 (markdown 테이블 포함)."""
+    from rich.markdown import Markdown
+    from rich.panel import Panel
+    # markdown 테이블이 포함된 경우 패널로 감싸서 표시
+    lines = resultText.strip().splitlines()
+    hasTable = any(ln.startswith("|") for ln in lines)
+    if hasTable:
+        # 너무 길면 앞부분만 (최대 30줄)
+        if len(lines) > 30:
+            truncated = "\n".join(lines[:30]) + f"\n\n... (+{len(lines) - 30}줄)"
+        else:
+            truncated = resultText.strip()
+        console.print(Panel(Markdown(truncated), border_style="dim", padding=(0, 1)))
+_TOOL_LABELS = {
+    "explore": "공시 탐색",
+    "finance": "재무 데이터",
+    "analyze": "분석 엔진",
+    "market": "시장 데이터",
+    "openapi": "OpenDART API",
+    "system": "시스템 정보",
+    "chart": "차트 생성",
+}
+def _toolLabel(toolName: str) -> str:
+    return _TOOL_LABELS.get(toolName, toolName)
+# ---------------------------------------------------------------------------
+# Company management
+# ---------------------------------------------------------------------------
+def _loadCompany(state: _ChatState, identifier: str, console) -> bool:
+    import dartlab
+    state.company = None  # GC 유도
+    try:
+        company = dartlab.Company(identifier)
+    except (ValueError, FileNotFoundError, OSError, RuntimeError):
+        from dartlab.core.resolve import resolve_from_text
+        company, _ = resolve_from_text(identifier)
+    if company is None:
+        console.print(f"  [red]종목을 찾을 수 없습니다: {identifier}[/]")
+        return False
+    state.company = company
+    state.stockCode = company.stockCode
+    console.print(f"  [bold]{company.corpName}[/] ({company.stockCode}) 로드 완료")
+    return True
+def _tryAutoDetect(userInput: str, state: _ChatState, console) -> None:
+    from dartlab.core.resolve import resolve_from_text
+    company, _ = resolve_from_text(userInput)
+    if company is not None:
+        state.company = company
+        state.stockCode = company.stockCode
+        console.print(f"  [dim]{company.corpName} ({company.stockCode}) 자동 감지[/]")
+# ---------------------------------------------------------------------------
+# Slash commands
+# ---------------------------------------------------------------------------
+def _handleSlash(userInput: str, state: _ChatState, console) -> bool:
+    parts = userInput.split(maxsplit=1)
+    cmd = parts[0].lower()
+    arg = parts[1].strip() if len(parts) > 1 else ""
+    if cmd in ("/quit", "/exit", "/q"):
+        console.print("[dim]채팅을 종료합니다.[/]")
+        return True
+    handlers = {
+        "/help": _cmdHelp,
+        "/company": _cmdCompany,
+        "/model": _cmdModel,
+        "/clear": _cmdClear,
+        "/suggest": _cmdSuggest,
+        "/status": _cmdStatus,
+    }
+    handler = handlers.get(cmd)
+    if handler:
+        handler(arg, state, console)
+    else:
+        console.print(f"  [yellow]알 수 없는 명령: {cmd}[/]  /help 로 사용법 확인")
+    return False
+def _cmdHelp(_arg: str, _state: _ChatState, console) -> None:
+    console.print("""
+  [bold]명령어[/]
+    /help                이 도움말
+    /company <이름/코드>   종목 변경
+    /model <이름>         모델/provider 변경
+    /clear               대화 기록 초기화
+    /suggest             추천 질문
+    /status              현재 설정
+    /quit                종료
+""")
+def _cmdCompany(arg: str, state: _ChatState, console) -> None:
+    if not arg:
+        if state.company:
+            console.print(f"  현재: [bold]{state.company.corpName}[/] ({state.stockCode})")
+        else:
+            console.print("  [dim]로드된 종목이 없습니다. /company 삼성전자[/]")
+        return
+    hadCompany = state.company is not None
+    if _loadCompany(state, arg, console):
+        if hadCompany:
+            state.history.clear()
+            state.sessionId = None
+            console.print("  [dim]종목 변경으로 대화 기록이 초기화되었습니다.[/]")
+def _cmdModel(arg: str, state: _ChatState, console) -> None:
+    if not arg:
+        console.print(f"  provider: [bold]{state.provider}[/]")
+        console.print(f"  model: {state.model or '(기본값)'}")
+        return
+    if arg in PROVIDERS:
+        state.provider = arg
+        state.model = None
+        console.print(f"  provider -> [bold]{arg}[/]")
+    else:
+        state.model = arg
+        console.print(f"  model -> [bold]{arg}[/]")
+def _cmdClear(_arg: str, state: _ChatState, console) -> None:
+    state.history.clear()
+    state.sessionId = None
+    console.print("  [dim]대화 기록이 초기화되었습니다.[/]")
+def _cmdSuggest(_arg: str, state: _ChatState, console) -> None:
+    if state.company is None:
+        console.print("  [dim]종목을 먼저 로드하세요. /company 삼성전자[/]")
+        return
+    from dartlab.ai.conversation.suggestions import suggestQuestions
+    questions = suggestQuestions(state.company)
+    for i, q in enumerate(questions, 1):
+        console.print(f"  [cyan]{i}.[/] {q}")
+def _cmdStatus(_arg: str, state: _ChatState, console) -> None:
+    console.print(f"  provider: [bold]{state.provider}[/]")
+    console.print(f"  model: {state.model or '(기본값)'}")
+    if state.company:
+        console.print(f"  company: [bold]{state.company.corpName}[/] ({state.stockCode})")
+    else:
+        console.print("  company: (없음)")
+    console.print(f"  history: {len(state.history)}개 메시지")
+# ---------------------------------------------------------------------------
+# Session persistence
+# ---------------------------------------------------------------------------
+def _saveMessage(state: _ChatState, role: str, content: str) -> None:
+    try:
+        from dartlab.cli.services.history import add_message, create_session
+        if state.sessionId is None:
+            stockCode = state.stockCode or "__no_company__"
+            state.sessionId = create_session(stockCode)
+        add_message(state.sessionId, role, content)
+    except (OSError, ImportError):
+        pass
+def _resumeSession(state: _ChatState, console) -> None:
+    try:
+        from dartlab.cli.services.history import get_latest_session, get_messages
+        sessionId = get_latest_session(state.stockCode)
+        if sessionId:
+            state.sessionId = sessionId
+            state.history = get_messages(sessionId)
+            console.print(f"  [dim]이전 대화 이어가기 (메��지 {len(state.history)}개)[/]")
+    except (OSError, ImportError):
+        pass
+# ---------------------------------------------------------------------------
+# Welcome
+# ---------------------------------------------------------------------------
+def _printWelcome(state: _ChatState, console) -> None:
+    console.print()
+    console.print("  [bold cyan]DartLab Chat[/]  --  대화형 AI 기업 분석")
+    providerLine = f"  [dim]provider: {state.provider}"
+    if state.model:
+        providerLine += f" / {state.model}"
+    providerLine += "[/]"
+    console.print(providerLine)
+    console.print()
+    if state.company:
+        console.print(f"  [bold]{state.company.corpName}[/] ({state.stockCode})")
+        try:
+            from dartlab.ai.conversation.suggestions import suggestQuestions
+            questions = suggestQuestions(state.company)
+            if questions:
+                console.print()
+                console.print("  [dim]추천 질문:[/]")
+                for q in questions[:4]:
+                    console.print(f"    [dim]-[/] {q}")
+        except (ImportError, AttributeError):
+            pass
+    else:
+        console.print("  [dim]종목 없이 시작합니다. 질문에 종목명을 포함하거나 /company 명령을 사용하세요.[/]")
+    console.print()
+    console.print("  [dim]/help 사용법  |  /quit 종료  |  Ctrl+C 입력 취소[/]")
+    console.print()

src/dartlab/cli/commands/collect.py CHANGED Viewed

@@ -119,6 +119,20 @@ def configure_parser(subparsers) -> None:
         action="store_true",
         help="누락 공시만 증분 수집 (DART)",
     )
     # EDGAR 전용
     parser.add_argument(
         "--tier",
@@ -139,6 +153,10 @@ def run(args) -> int:
     if source == "edgar":
         return _runEdgar(console, args)
     # --- DART ---
     if getattr(args, "check", False):
         return _runCheck(console, args)
@@ -178,12 +196,45 @@ def _printHelp(console) -> None:
     console.print("  dartlab collect --batch             전체 상장 배치 수집")
     console.print("  dartlab collect --stats             수집 현황")
     console.print()
     console.print("  [bold]EDGAR[/] (ticker = 영문 → 자동 감지):")
     console.print("  dartlab collect AAPL MSFT           지정 ticker 수집")
     console.print("  dartlab collect --tier sp500        S&P 500 전체 수집")
     console.print("  dartlab collect --tier sp500 --limit 10  10개만 테스트")
 # ── EDGAR ─────────────────────────────────────────────

         action="store_true",
         help="누락 공시만 증분 수집 (DART)",
     )
+    # scan 프리빌드
+    parser.add_argument(
+        "--scan",
+        nargs="?",
+        const="all",
+        default=None,
+        help="전종목 scan 프리빌드 (all/changes/finance/report)",
+    )
+    parser.add_argument(
+        "--since-year",
+        type=int,
+        default=2021,
+        help="scan 프리빌드 시작 연도 (기본 2021)",
+    )
     # EDGAR 전용
     parser.add_argument(
         "--tier",
     if source == "edgar":
         return _runEdgar(console, args)
+    # --- scan 프리빌드 ---
+    if getattr(args, "scan", None):
+        return _runScan(console, args)
     # --- DART ---
     if getattr(args, "check", False):
         return _runCheck(console, args)
     console.print("  dartlab collect --batch             전체 상장 배치 수집")
     console.print("  dartlab collect --stats             수집 현황")
     console.print()
+    console.print("  [bold]scan 프리빌드[/]:")
+    console.print("  dartlab collect --scan              전종목 횡단분석 프리빌드 (changes+finance+report)")
+    console.print("  dartlab collect --scan changes      changes만 프리빌드")
+    console.print("  dartlab collect --scan finance      finance만 프리빌드")
+    console.print("  dartlab collect --scan report       report만 프리빌드")
+    console.print()
     console.print("  [bold]EDGAR[/] (ticker = 영문 → 자동 감지):")
     console.print("  dartlab collect AAPL MSFT           지정 ticker 수집")
     console.print("  dartlab collect --tier sp500        S&P 500 전체 수집")
     console.print("  dartlab collect --tier sp500 --limit 10  10개만 테스트")
+# ── scan 프리빌드 ──────────────────────────────────────
+def _runScan(console, args) -> int:
+    """전종목 scan 프리빌드 실행."""
+    from dartlab.market.scan.builder import buildScan, buildChanges, buildFinance, buildReport
+    target = getattr(args, "scan", "all")
+    sinceYear = getattr(args, "since_year", 2021)
+    console.print(f"[bold]scan 프리빌드[/] target={target}, sinceYear={sinceYear}")
+    if target == "all":
+        buildScan(sinceYear=sinceYear, verbose=True)
+    elif target == "changes":
+        buildChanges(sinceYear=sinceYear, verbose=True)
+    elif target == "finance":
+        buildFinance(sinceYear=sinceYear, verbose=True)
+    elif target == "report":
+        buildReport(sinceYear=sinceYear, verbose=True)
+    else:
+        console.print(f"[red]알 수 없는 scan 타겟: {target}[/]")
+        return 1
+    return 0
 # ── EDGAR ─────────────────────────────────────────────

src/dartlab/cli/parser.py CHANGED Viewed

@@ -19,6 +19,7 @@ COMMAND_SPECS = (
     CommandSpec("modules", "dartlab.cli.commands.modules"),
     # AI / 내보내기
     CommandSpec("ask", "dartlab.cli.commands.ask"),
     CommandSpec("report", "dartlab.cli.commands.report"),
     CommandSpec("excel", "dartlab.cli.commands.excel"),
     # 분석

     CommandSpec("modules", "dartlab.cli.commands.modules"),
     # AI / 내보내기
     CommandSpec("ask", "dartlab.cli.commands.ask"),
+    CommandSpec("chat", "dartlab.cli.commands.chat"),
     CommandSpec("report", "dartlab.cli.commands.report"),
     CommandSpec("excel", "dartlab.cli.commands.excel"),
     # 분석

src/dartlab/core/dataConfig.py CHANGED Viewed

@@ -22,6 +22,10 @@ DATA_RELEASES: dict[str, dict] = {
         "dir": "dart/report",
         "label": "정기보고서 데이터",
     },
     "edgarDocs": {
         "dir": "edgar/docs",
         "label": "SEC EDGAR 공시 문서 데이터",

         "dir": "dart/report",
         "label": "정기보고서 데이터",
     },
+    "scan": {
+        "dir": "dart/scan",
+        "label": "전종목 횡단분석 프리빌드 데이터",
+    },
     "edgarDocs": {
         "dir": "edgar/docs",
         "label": "SEC EDGAR 공시 문서 데이터",

src/dartlab/core/dataLoader.py CHANGED Viewed

@@ -300,11 +300,13 @@ def downloadAll(category: str = "docs", *, forceUpdate: bool = False) -> None:
     lastErr = None
     for attempt in range(_HF_MAX_RETRIES):
         try:
             snapshot_download(
                 repo_id=HF_REPO,
                 repo_type="dataset",
                 local_dir=str(localDir),
-                allow_patterns=f"{hfDir}/*.parquet",
                 force_download=forceUpdate if attempt == 0 else False,
             )
             break
@@ -320,7 +322,8 @@ def downloadAll(category: str = "docs", *, forceUpdate: bool = False) -> None:
             f"마지막 에러: {lastErr}"
         )
-    count = len(list(dataDir.glob("*.parquet")))
     emit("download_all:hf_done", label=label, count=count, dataDir=str(dataDir))

     lastErr = None
     for attempt in range(_HF_MAX_RETRIES):
         try:
+            # scan은 하위 폴더(report/)도 포함하므로 ** 패턴 사용
+            pattern = f"{hfDir}/**/*.parquet" if category == "scan" else f"{hfDir}/*.parquet"
             snapshot_download(
                 repo_id=HF_REPO,
                 repo_type="dataset",
                 local_dir=str(localDir),
+                allow_patterns=pattern,
                 force_download=forceUpdate if attempt == 0 else False,
             )
             break
             f"마지막 에러: {lastErr}"
         )
+    globPattern = "**/*.parquet" if category == "scan" else "*.parquet"
+    count = len(list(dataDir.glob(globPattern)))
     emit("download_all:hf_done", label=label, count=count, dataDir=str(dataDir))

src/dartlab/market/_helpers.py CHANGED Viewed

@@ -10,11 +10,25 @@ import polars as pl
 def scan_parquets(api_type: str, keep_cols: list[str]) -> pl.DataFrame:
     """report parquet에서 특정 apiType만 LazyFrame 스캔.
-    keep_cols 중 실제 존재하는 컬럼만 선택하며, 핵심 컬럼(meta 제외)이
-    하나도 없는 parquet는 건너뛴다.  파일 간 스키마가 다르면 null 패딩으로 통합.
     """
     from dartlab.core.dataLoader import _dataDir
     report_dir = Path(_dataDir("report"))
     parquet_files = sorted(report_dir.glob("*.parquet"))
@@ -121,6 +135,55 @@ def parse_date_year(s) -> int | None:
     return None
 def scan_finance_parquets(
     statement: str,
     account_ids: set[str],
@@ -130,16 +193,26 @@ def scan_finance_parquets(
 ) -> dict[str, float]:
     """finance parquet 전수 스캔 → {종목코드: 값}.
-    statement: "BS", "IS", "CIS" 등
-    account_ids/account_nms: 매칭 대상
     """
     from dartlab.core.dataLoader import _dataDir
     finance_dir = Path(_dataDir("finance"))
     parquet_files = sorted(finance_dir.glob("*.parquet"))
     result: dict[str, float] = {}
-    sj_divs = [statement] if statement != "IS" else ["IS", "CIS"]
     for pf in parquet_files:
         code = pf.stem
         try:

 def scan_parquets(api_type: str, keep_cols: list[str]) -> pl.DataFrame:
     """report parquet에서 특정 apiType만 LazyFrame 스캔.
+    scan/report/{apiType}.parquet 프리빌드가 있으면 단일 파일에서 즉시 로드.
+    없으면 종목별 parquet 순회 (fallback).
     """
     from dartlab.core.dataLoader import _dataDir
+    # 1순위: 프리빌드 scan parquet
+    scan_path = Path(_dataDir("scan")) / "report" / f"{api_type}.parquet"
+    if scan_path.exists():
+        try:
+            lf = pl.scan_parquet(str(scan_path))
+            schema_names = lf.collect_schema().names()
+            available = [c for c in keep_cols if c in schema_names]
+            non_meta = [c for c in available if c not in ("stockCode", "year", "quarter")]
+            if non_meta:
+                return lf.select(available).collect()
+        except (pl.exceptions.PolarsError, OSError):
+            pass  # fallback to per-file scan
+    # 2순위: 종목별 순회 (fallback)
     report_dir = Path(_dataDir("report"))
     parquet_files = sorted(report_dir.glob("*.parquet"))
     return None
+def _scanFinanceFromMerged(
+    scanPath: Path,
+    sjDivs: list[str],
+    accountIds: set[str],
+    accountNms: set[str],
+    amountCol: str,
+) -> dict[str, float]:
+    """합산 finance parquet에서 종목별 최신 연도 값 추출."""
+    scCol = "stockCode" if "stockCode" in pl.scan_parquet(str(scanPath)).collect_schema().names() else "stock_code"
+    target = (
+        pl.scan_parquet(str(scanPath))
+        .filter(
+            pl.col("sj_div").is_in(sjDivs)
+            & (pl.col("fs_nm").str.contains("연결") | pl.col("fs_nm").str.contains("재무제표"))
+        )
+        .collect()
+    )
+    if target.is_empty() or "account_id" not in target.columns:
+        return {}
+    # 연결 우선
+    cfs = target.filter(pl.col("fs_nm").str.contains("연결"))
+    target = cfs if not cfs.is_empty() else target
+    # 종목별 최신 연도만
+    latestYear = (
+        target.group_by(scCol)
+        .agg(pl.col("bsns_year").max().alias("_maxYear"))
+    )
+    target = target.join(latestYear, on=scCol).filter(pl.col("bsns_year") == pl.col("_maxYear")).drop("_maxYear")
+    # 계정 매칭
+    matched = target.filter(
+        pl.col("account_id").is_in(list(accountIds)) | pl.col("account_nm").is_in(list(accountNms))
+    )
+    result: dict[str, float] = {}
+    for row in matched.iter_rows(named=True):
+        code = row.get(scCol, "")
+        if code and code not in result:
+            val = parse_num(row.get(amountCol))
+            if val is not None:
+                result[code] = val
+    return result
 def scan_finance_parquets(
     statement: str,
     account_ids: set[str],
 ) -> dict[str, float]:
     """finance parquet 전수 스캔 → {종목코드: 값}.
+    scan/finance.parquet 프리빌드가 있으면 단일 파일에서 즉시 필터.
+    없으면 종목별 parquet 순회 (fallback).
     """
     from dartlab.core.dataLoader import _dataDir
+    sj_divs = [statement] if statement != "IS" else ["IS", "CIS"]
+    # 1순위: 프리빌드 scan parquet
+    scan_path = Path(_dataDir("scan")) / "finance.parquet"
+    if scan_path.exists():
+        try:
+            return _scanFinanceFromMerged(scan_path, sj_divs, account_ids, account_nms, amount_col)
+        except (pl.exceptions.PolarsError, OSError):
+            pass  # fallback
+    # 2순위: 종목별 순회 (fallback)
     finance_dir = Path(_dataDir("finance"))
     parquet_files = sorted(finance_dir.glob("*.parquet"))
     result: dict[str, float] = {}
     for pf in parquet_files:
         code = pf.stem
         try:

src/dartlab/market/scan/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""전종목 횡단분석 프리빌드 — changes + finance + report 합산 parquet."""
+from dartlab.market.scan.builder import buildScan, buildChanges, buildFinance, buildReport
+__all__ = ["buildScan", "buildChanges", "buildFinance", "buildReport"]

src/dartlab/market/scan/builder.py ADDED Viewed

	@@ -0,0 +1,436 @@

+"""전종목 scan 프리빌드 빌더.
+docs → changes, finance → 합산, report → apiType별 분리.
+실험 014/015에서 검증된 로직을 프로덕션화.
+배치를 중간 파일로 쓰고 마지막에 합산하여 segfault 방지.
+"""
+from __future__ import annotations
+import shutil
+import time
+from pathlib import Path
+import polars as pl
+# scanner에서 실제 사용하는 apiType 10개
+SCAN_API_TYPES = [
+    "majorHolder",
+    "executive",
+    "employee",
+    "executivePayAllTotal",
+    "executivePayIndividual",
+    "auditOpinion",
+    "dividend",
+    "treasuryStock",
+    "capitalChange",
+    "corporateBond",
+]
+_BATCH = 200
+def _scanDir() -> Path:
+    """scan 출력 디렉토리."""
+    from dartlab.core.dataLoader import _dataDir
+    return Path(_dataDir("scan"))
+def _docsDir() -> Path:
+    from dartlab.core.dataLoader import _dataDir
+    return Path(_dataDir("docs"))
+def _financeDir() -> Path:
+    from dartlab.core.dataLoader import _dataDir
+    return Path(_dataDir("finance"))
+def _reportDir() -> Path:
+    from dartlab.core.dataLoader import _dataDir
+    return Path(_dataDir("report"))
+def _log(msg: str) -> None:
+    print(msg)
+def _mergeBatchFiles(batchDir: Path, outputPath: Path, *, how: str = "vertical") -> int:
+    """배치 파일들을 읽어서 1개로 합산. 반환: 총 행수."""
+    batchFiles = sorted(batchDir.glob("batch_*.parquet"))
+    if not batchFiles:
+        return 0
+    parts = [pl.read_parquet(str(f)) for f in batchFiles]
+    merged = pl.concat(parts, how=how)
+    merged.write_parquet(str(outputPath), compression="zstd")
+    totalRows = merged.height
+    del merged, parts
+    return totalRows
+# ── changes ──────────────────────────────────────────────────────────
+def _buildRawChanges(parquetPath: Path, stockCode: str, sinceYear: int = 2021) -> pl.DataFrame | None:
+    """raw docs parquet → section 단위 changes."""
+    try:
+        raw = pl.read_parquet(str(parquetPath))
+    except (pl.exceptions.PolarsError, OSError):
+        return None
+    needed = {"year", "section_order", "section_title", "section_content"}
+    if not needed.issubset(set(raw.columns)):
+        return None
+    raw = raw.filter(pl.col("year").cast(pl.Utf8).str.to_integer(strict=False) >= sinceYear - 1)
+    if raw.height < 2:
+        return None
+    work = raw.select(["year", "section_order", "section_title", "section_content"])
+    work = work.sort(["section_order", "section_title", "year"])
+    work = work.with_columns([
+        pl.col("year").shift(1).over(["section_order", "section_title"]).alias("_prevYear"),
+        pl.col("section_content").shift(1).over(["section_order", "section_title"]).alias("_prevContent"),
+    ])
+    work = work.with_columns([
+        pl.col("section_content").hash().alias("_hash"),
+        pl.col("_prevContent").hash().alias("_prevHash"),
+        pl.col("section_content").str.len_chars().alias("sizeB"),
+        pl.col("_prevContent").str.len_chars().alias("sizeA"),
+        pl.col("section_content").str.slice(0, 200).alias("preview"),
+    ])
+    changes = work.filter(
+        pl.col("_prevYear").is_not_null()
+        & ~(pl.col("section_content").is_null() & pl.col("_prevContent").is_null())
+        & (
+            (pl.col("_hash") != pl.col("_prevHash"))
+            | pl.col("section_content").is_null()
+            | pl.col("_prevContent").is_null()
+        )
+    )
+    if changes.height == 0:
+        return None
+    numPattern = r"[\d,.]+"
+    changes = changes.with_columns([
+        pl.col("section_content").str.replace_all(numPattern, "N").alias("_stripped"),
+        pl.col("_prevContent").str.replace_all(numPattern, "N").alias("_prevStripped"),
+    ])
+    changes = changes.with_columns(
+        pl.when(pl.col("_prevContent").is_null())
+        .then(pl.lit("appeared"))
+        .when(pl.col("section_content").is_null())
+        .then(pl.lit("disappeared"))
+        .when(pl.col("_stripped") == pl.col("_prevStripped"))
+        .then(pl.lit("numeric"))
+        .when(
+            (pl.col("sizeA") > 0)
+            & ((pl.col("sizeB").cast(pl.Int64) - pl.col("sizeA").cast(pl.Int64)).abs().cast(pl.Float64)
+               / pl.col("sizeA").cast(pl.Float64) > 0.5)
+        )
+        .then(pl.lit("structural"))
+        .otherwise(pl.lit("wording"))
+        .alias("changeType")
+    )
+    changes = changes.filter(pl.col("year").cast(pl.Utf8).str.to_integer(strict=False) >= sinceYear)
+    return changes.select([
+        pl.col("_prevYear").alias("fromPeriod"),
+        pl.col("year").alias("toPeriod"),
+        pl.col("section_title").alias("sectionTitle"),
+        pl.col("changeType"),
+        pl.col("sizeA"),
+        pl.col("sizeB"),
+        (pl.col("sizeB").cast(pl.Int64) - pl.col("sizeA").cast(pl.Int64)).alias("sizeDelta"),
+        pl.col("preview"),
+        pl.lit(stockCode).alias("stockCode"),
+    ])
+def buildChanges(*, sinceYear: int = 2021, verbose: bool = True) -> Path | None:
+    """docs → changes 프리빌드. 반환: 출력 parquet 경로."""
+    docsDir = _docsDir()
+    outDir = _scanDir()
+    outDir.mkdir(parents=True, exist_ok=True)
+    outputPath = outDir / "changes.parquet"
+    batchDir = outDir / "_tmp_changes"
+    batchDir.mkdir(parents=True, exist_ok=True)
+    allFiles = sorted(docsDir.glob("*.parquet"))
+    if not allFiles:
+        if verbose:
+            _log("docs parquet 없음 — changes 빌드 건너뜀")
+        return None
+    if verbose:
+        _log(f"[changes] {len(allFiles)}종목, sinceYear={sinceYear}")
+    t0 = time.perf_counter()
+    batchChunks: list[pl.DataFrame] = []
+    success = 0
+    failed = 0
+    totalRows = 0
+    batchIdx = 0
+    for i, pf in enumerate(allFiles):
+        result = _buildRawChanges(pf, pf.stem, sinceYear)
+        if result is not None and result.height > 0:
+            batchChunks.append(result)
+            totalRows += result.height
+            success += 1
+        else:
+            failed += 1
+        if len(batchChunks) >= _BATCH or i == len(allFiles) - 1:
+            if batchChunks:
+                batch = pl.concat(batchChunks)
+                batch.write_parquet(str(batchDir / f"batch_{batchIdx:03d}.parquet"), compression="zstd")
+                del batch
+                batchChunks = []
+                batchIdx += 1
+        if verbose and (i + 1) % 500 == 0:
+            _log(f"  [{i+1}/{len(allFiles)}] {success}ok {failed}fail {totalRows:,}rows {time.perf_counter()-t0:.0f}s")
+    if batchIdx == 0:
+        if verbose:
+            _log("  changes 결과 없음")
+        shutil.rmtree(batchDir, ignore_errors=True)
+        return None
+    _mergeBatchFiles(batchDir, outputPath)
+    shutil.rmtree(batchDir, ignore_errors=True)
+    elapsed = time.perf_counter() - t0
+    diskMb = outputPath.stat().st_size / 1024 / 1024
+    if verbose:
+        _log(f"  완료: {success}종목, {totalRows:,}행, {diskMb:.1f}MB, {elapsed:.0f}초")
+    return outputPath
+# ── finance ──────────────────────────────────────────────────────────
+def buildFinance(*, sinceYear: int = 2021, verbose: bool = True) -> Path | None:
+    """finance 전종목 합산. 반환: 출력 parquet 경로."""
+    finDir = _financeDir()
+    outDir = _scanDir()
+    outDir.mkdir(parents=True, exist_ok=True)
+    outputPath = outDir / "finance.parquet"
+    batchDir = outDir / "_tmp_finance"
+    batchDir.mkdir(parents=True, exist_ok=True)
+    allFiles = sorted(finDir.glob("*.parquet"))
+    if not allFiles:
+        if verbose:
+            _log("finance parquet 없음 — 빌드 건너뜀")
+        return None
+    if verbose:
+        _log(f"[finance] {len(allFiles)}종목, sinceYear={sinceYear}")
+    t0 = time.perf_counter()
+    batchChunks: list[pl.DataFrame] = []
+    success = 0
+    totalRows = 0
+    batchIdx = 0
+    for i, pf in enumerate(allFiles):
+        try:
+            df = pl.read_parquet(str(pf))
+        except (pl.exceptions.PolarsError, OSError):
+            continue
+        if "stockCode" not in df.columns and "stock_code" not in df.columns:
+            df = df.with_columns(pl.lit(pf.stem).alias("stockCode"))
+        elif "stock_code" in df.columns and "stockCode" not in df.columns:
+            df = df.rename({"stock_code": "stockCode"})
+        if "bsns_year" in df.columns:
+            df = df.filter(
+                pl.col("bsns_year").cast(pl.Utf8).str.to_integer(strict=False) >= sinceYear
+            )
+        if df.height == 0:
+            continue
+        batchChunks.append(df)
+        totalRows += df.height
+        success += 1
+        if len(batchChunks) >= _BATCH or i == len(allFiles) - 1:
+            if batchChunks:
+                batch = pl.concat(batchChunks, how="diagonal_relaxed")
+                batch.write_parquet(str(batchDir / f"batch_{batchIdx:03d}.parquet"), compression="zstd")
+                del batch
+                batchChunks = []
+                batchIdx += 1
+        if verbose and (i + 1) % 500 == 0:
+            _log(f"  [{i+1}/{len(allFiles)}] {success}ok {totalRows:,}rows {time.perf_counter()-t0:.0f}s")
+    if batchIdx == 0:
+        if verbose:
+            _log("  finance 결과 없음")
+        shutil.rmtree(batchDir, ignore_errors=True)
+        return None
+    _mergeBatchFiles(batchDir, outputPath, how="diagonal_relaxed")
+    shutil.rmtree(batchDir, ignore_errors=True)
+    elapsed = time.perf_counter() - t0
+    diskMb = outputPath.stat().st_size / 1024 / 1024
+    if verbose:
+        _log(f"  완료: {success}종목, {totalRows:,}행, {diskMb:.1f}MB, {elapsed:.0f}초")
+    return outputPath
+# ── report ─────────────────────────────────────────────────���─────────
+def buildReport(*, sinceYear: int = 2021, verbose: bool = True) -> list[Path]:
+    """report → apiType별 분리 parquet. 반환: 생성된 파일 경로 목록."""
+    repDir = _reportDir()
+    outDir = _scanDir() / "report"
+    outDir.mkdir(parents=True, exist_ok=True)
+    allFiles = sorted(repDir.glob("*.parquet"))
+    if not allFiles:
+        if verbose:
+            _log("report parquet 없음 — 빌드 건너뜀")
+        return []
+    if verbose:
+        _log(f"[report] {len(allFiles)}종목 → apiType별 분리")
+    t0 = time.perf_counter()
+    # apiType별 배치 디렉토리
+    apiBatchDirs: dict[str, Path] = {}
+    apiBatchIdx: dict[str, int] = {}
+    apiChunks: dict[str, list[pl.DataFrame]] = {}
+    apiRows: dict[str, int] = {}
+    for at in SCAN_API_TYPES:
+        bd = outDir / f"_tmp_{at}"
+        bd.mkdir(parents=True, exist_ok=True)
+        apiBatchDirs[at] = bd
+        apiBatchIdx[at] = 0
+        apiChunks[at] = []
+        apiRows[at] = 0
+    processed = 0
+    for i, pf in enumerate(allFiles):
+        try:
+            df = pl.read_parquet(str(pf))
+        except (pl.exceptions.PolarsError, OSError):
+            continue
+        if "apiType" not in df.columns:
+            continue
+        if "stockCode" not in df.columns and "stock_code" not in df.columns:
+            df = df.with_columns(pl.lit(pf.stem).alias("stockCode"))
+        if "year" in df.columns:
+            df = df.with_columns(
+                pl.col("year").cast(pl.Utf8).str.to_integer(strict=False).alias("_yearInt")
+            )
+            df = df.filter(
+                pl.col("_yearInt").is_null() | (pl.col("_yearInt") >= sinceYear)
+            ).drop("_yearInt")
+        processed += 1
+        for apiType in SCAN_API_TYPES:
+            sub = df.filter(pl.col("apiType") == apiType)
+            if sub.height > 0:
+                apiChunks[apiType].append(sub)
+                apiRows[apiType] += sub.height
+                if len(apiChunks[apiType]) >= _BATCH:
+                    batch = pl.concat(apiChunks[apiType], how="diagonal_relaxed")
+                    idx = apiBatchIdx[apiType]
+                    batch.write_parquet(
+                        str(apiBatchDirs[apiType] / f"batch_{idx:03d}.parquet"),
+                        compression="zstd",
+                    )
+                    del batch
+                    apiChunks[apiType] = []
+                    apiBatchIdx[apiType] = idx + 1
+        if verbose and (i + 1) % 500 == 0:
+            _log(f"  [{i+1}/{len(allFiles)}] {processed}ok {time.perf_counter()-t0:.0f}s")
+    # 남은 청크 flush + 합산
+    outputs: list[Path] = []
+    for apiType in SCAN_API_TYPES:
+        # 남은 청크 쓰기
+        if apiChunks[apiType]:
+            batch = pl.concat(apiChunks[apiType], how="diagonal_relaxed")
+            idx = apiBatchIdx[apiType]
+            batch.write_parquet(
+                str(apiBatchDirs[apiType] / f"batch_{idx:03d}.parquet"),
+                compression="zstd",
+            )
+            del batch
+            apiBatchIdx[apiType] = idx + 1
+        if apiBatchIdx[apiType] == 0:
+            shutil.rmtree(apiBatchDirs[apiType], ignore_errors=True)
+            continue
+        outPath = outDir / f"{apiType}.parquet"
+        _mergeBatchFiles(apiBatchDirs[apiType], outPath, how="diagonal_relaxed")
+        shutil.rmtree(apiBatchDirs[apiType], ignore_errors=True)
+        diskMb = outPath.stat().st_size / 1024 / 1024
+        outputs.append(outPath)
+        if verbose:
+            _log(f"  {apiType}: {apiRows[apiType]:,}행, {diskMb:.1f}MB")
+    elapsed = time.perf_counter() - t0
+    if verbose:
+        _log(f"  report 완료: {len(outputs)}개 apiType, {elapsed:.0f}초")
+    return outputs
+# ── 전체 빌드 ────────────────────────────────────────────────────────
+def buildScan(*, sinceYear: int = 2021, verbose: bool = True) -> dict[str, Path | list[Path] | None]:
+    """changes + finance + report 전체 프리빌드."""
+    if verbose:
+        _log(f"전종목 scan 프리빌드 시작 (sinceYear={sinceYear})")
+        _log("=" * 60)
+    results: dict[str, Path | list[Path] | None] = {}
+    results["changes"] = buildChanges(sinceYear=sinceYear, verbose=verbose)
+    results["finance"] = buildFinance(sinceYear=sinceYear, verbose=verbose)
+    results["report"] = buildReport(sinceYear=sinceYear, verbose=verbose)
+    if verbose:
+        _log("=" * 60)
+        scanDir = _scanDir()
+        if scanDir.exists():
+            totalMb = sum(
+                f.stat().st_size for f in scanDir.rglob("*.parquet")
+            ) / 1024 / 1024
+            _log(f"scan 전체: {totalMb:.1f}MB")
+    return results

src/dartlab/providers/dart/_sections_source.py CHANGED Viewed

@@ -6,6 +6,7 @@ raw DataFrame를 감싸되, 같은 경로에서 freq/semantic 파생표를 바
 from __future__ import annotations
 from typing import TYPE_CHECKING, Any
 import polars as pl
@@ -13,6 +14,9 @@ import polars as pl
 if TYPE_CHECKING:
     from dartlab.providers.dart.company import Company
 class _SectionsSource:
     """sections source-of-truth accessor.
@@ -176,6 +180,38 @@ class _SectionsSource:
             changedOnly=changedOnly,
         )
     def __getattr__(self, name: str) -> Any:
         frame = self.raw
         if frame is None:
@@ -199,6 +235,120 @@ class _SectionsSource:
         return (
             "SectionsSource("
             "shape="
-            f"{frame.shape}, methods=[raw, topics(), outline(), periods(), ordered(), coverage(), freq(), semanticRegistry(), semanticCollisions(), structureRegistry(), structureCollisions(), structureEvents(), structureSummary(), structureChanges()]"
             ")"
         )

 from __future__ import annotations
+import re
 from typing import TYPE_CHECKING, Any
 import polars as pl
 if TYPE_CHECKING:
     from dartlab.providers.dart.company import Company
+_PERIOD_RE = re.compile(r"^\d{4}$")
+_NUM_PATTERN = r"[\d,.]+"
 class _SectionsSource:
     """sections source-of-truth accessor.
             changedOnly=changedOnly,
         )
+    def changes(
+        self,
+        *,
+        topic: str | None = None,
+        fromPeriod: str | None = None,
+        toPeriod: str | None = None,
+    ) -> pl.DataFrame | None:
+        """기간 간 변화 블록 추출 (벡터화).
+        sections wide DataFrame에서 인접 기간 비교로 변화만 추출.
+        5종 유형: appeared, disappeared, numeric, structural, wording.
+        """
+        frame = self.raw
+        if frame is None:
+            return None
+        return _buildChanges(frame, topic=topic, fromPeriod=fromPeriod, toPeriod=toPeriod)
+    def changeSummary(self, *, topN: int = 10) -> pl.DataFrame | None:
+        """topic별 변화 요약 — AI 컨텍스트용."""
+        ch = self.changes()
+        if ch is None or ch.is_empty():
+            return None
+        return (
+            ch.group_by(["topic", "changeType"])
+            .agg(
+                pl.len().alias("count"),
+                pl.col("sizeDelta").mean().round(0).cast(pl.Int64).alias("avgDelta"),
+            )
+            .sort(["topic", "count"], descending=[False, True])
+            .head(topN * 5)
+        )
     def __getattr__(self, name: str) -> Any:
         frame = self.raw
         if frame is None:
         return (
             "SectionsSource("
             "shape="
+            f"{frame.shape}, methods=[raw, topics(), outline(), periods(), ordered(), coverage(), freq(), changes(), changeSummary(), semanticRegistry(), semanticCollisions(), structureRegistry(), structureCollisions(), structureEvents(), structureSummary(), structureChanges()]"
             ")"
         )
+def _buildChanges(
+    sections: pl.DataFrame,
+    *,
+    topic: str | None = None,
+    fromPeriod: str | None = None,
+    toPeriod: str | None = None,
+) -> pl.DataFrame:
+    """sections wide DataFrame → 변화 블록 DataFrame (벡터화).
+    실험 101-010에서 검증된 Polars 벡터화 패턴.
+    0.15초에 22,060행 생성 (Python 루프 대비 12x).
+    """
+    annualCols = sorted(c for c in sections.columns if _PERIOD_RE.match(c))
+    if len(annualCols) < 2:
+        return pl.DataFrame()
+    metaCols = ["topic"]
+    for col in ("textPathKey", "blockType", "blockOrder"):
+        if col in sections.columns:
+            metaCols.append(col)
+    if topic is not None:
+        sections = sections.filter(pl.col("topic") == topic)
+        if sections.is_empty():
+            return pl.DataFrame()
+    work = sections.with_row_index("_row")
+    # wide → long
+    long = work.select(["_row"] + metaCols + annualCols).unpivot(
+        index=["_row"] + metaCols,
+        on=annualCols,
+        variable_name="period",
+        value_name="text",
+    )
+    long = long.with_columns(pl.col("text").cast(pl.Utf8))
+    # hash + len (null 보존)
+    long = long.with_columns(
+        pl.when(pl.col("text").is_not_null())
+        .then(pl.col("text").hash())
+        .otherwise(pl.lit(None, dtype=pl.UInt64))
+        .alias("_hash"),
+        pl.when(pl.col("text").is_not_null())
+        .then(pl.col("text").str.len_chars())
+        .otherwise(pl.lit(None, dtype=pl.UInt32))
+        .alias("_len"),
+        pl.when(pl.col("text").is_not_null())
+        .then(pl.col("text").str.slice(0, 200))
+        .otherwise(pl.lit(None, dtype=pl.Utf8))
+        .alias("preview"),
+    )
+    # 인접 기간 비교
+    long = long.sort(["_row", "period"])
+    long = long.with_columns(
+        pl.col("period").shift(1).over("_row").alias("_prevPeriod"),
+        pl.col("_hash").shift(1).over("_row").alias("_prevHash"),
+        pl.col("_len").shift(1).over("_row").alias("_prevLen"),
+        pl.col("text").shift(1).over("_row").alias("_prevText"),
+    )
+    # 변화 필터
+    changes = long.filter(
+        pl.col("_prevPeriod").is_not_null()
+        & ~(pl.col("text").is_null() & pl.col("_prevText").is_null())
+        & ((pl.col("_hash") != pl.col("_prevHash")) | pl.col("text").is_null() | pl.col("_prevText").is_null())
+    )
+    if changes.is_empty():
+        return pl.DataFrame()
+    # 기간 필터
+    if fromPeriod is not None:
+        changes = changes.filter(pl.col("_prevPeriod") >= fromPeriod)
+    if toPeriod is not None:
+        changes = changes.filter(pl.col("period") <= toPeriod)
+    # 변화 유형 분류
+    changes = changes.with_columns(
+        pl.col("text").str.replace_all(_NUM_PATTERN, "N").alias("_stripped"),
+        pl.col("_prevText").str.replace_all(_NUM_PATTERN, "N").alias("_prevStripped"),
+    )
+    changes = changes.with_columns(
+        pl.when(pl.col("_prevText").is_null())
+        .then(pl.lit("appeared"))
+        .when(pl.col("text").is_null())
+        .then(pl.lit("disappeared"))
+        .when(pl.col("_stripped") == pl.col("_prevStripped"))
+        .then(pl.lit("numeric"))
+        .when(
+            (pl.col("_prevLen") > 0)
+            & (
+                (pl.col("_len").cast(pl.Int64) - pl.col("_prevLen").cast(pl.Int64)).abs().cast(pl.Float64)
+                / pl.col("_prevLen").cast(pl.Float64)
+                > 0.5
+            )
+        )
+        .then(pl.lit("structural"))
+        .otherwise(pl.lit("wording"))
+        .alias("changeType")
+    )
+    # 결과 정리
+    resultCols = ["_prevPeriod", "period", "changeType", "_prevLen", "_len", "preview"] + metaCols
+    renameMap = {"_prevPeriod": "fromPeriod", "period": "toPeriod", "_prevLen": "sizeA", "_len": "sizeB"}
+    result = changes.select(resultCols).rename(renameMap)
+    result = result.with_columns((pl.col("sizeB").cast(pl.Int64) - pl.col("sizeA").cast(pl.Int64)).alias("sizeDelta"))
+    return result

src/dartlab/review/builders.py CHANGED Viewed

@@ -396,21 +396,18 @@ def fundingSourcesBlock(data: dict) -> list:
         )
     blocks.append(MetricBlock(metrics))
-    # 시계열 테이블
     history = data.get("history", [])
     if len(history) >= 2:
-        histRows = []
         for h in history:
-            histRows.append(
-                {
-                    "기간": h["period"],
-                    "내부유보": f"{h['retainedPct']:.0f}%",
-                    "주주자본": f"{h['paidInPct']:.0f}%",
-                    "금융차입": f"{h['finDebtPct']:.0f}%",
-                    "영업조달": f"{h['opFundingPct']:.0f}%",
-                }
-            )
-        blocks.append(TableBlock("조달원 비중 추이", pl.DataFrame(histRows)))
     # 보충 지표 (순차입금/EBITDA, 암묵적 차입금리)
     suppMetrics = []

         )
     blocks.append(MetricBlock(metrics))
+    # 시계열 테이블 (행=항목, 열=기간)
     history = data.get("history", [])
     if len(history) >= 2:
+        cols = {"": ["내부유보", "주주자본", "금융차입", "영업조달"]}
         for h in history:
+            cols[h["period"]] = [
+                f"{h['retainedPct']:.0f}%",
+                f"{h['paidInPct']:.0f}%",
+                f"{h['finDebtPct']:.0f}%",
+                f"{h['opFundingPct']:.0f}%",
+            ]
+        blocks.append(TableBlock("조달원 비중 추이", pl.DataFrame(cols)))
     # 보충 지표 (순차입금/EBITDA, 암묵적 차입금리)
     suppMetrics = []