github-actions[bot] commited on
Commit
6254e2b
ยท
1 Parent(s): bc7389c

sync from 4c3ef19

Browse files
Files changed (35) hide show
  1. Dockerfile +22 -2
  2. README_PROJECT.md +0 -1108
  3. pyproject.toml +1 -0
  4. src/dartlab/ai/DEV.md +72 -0
  5. src/dartlab/ai/context/builder.py +66 -4
  6. src/dartlab/ai/context/pruning.py +95 -0
  7. src/dartlab/ai/conversation/prompts.py +34 -7
  8. src/dartlab/ai/conversation/templates/analysisPhilosophy.py +57 -0
  9. src/dartlab/ai/eval/batchResults/batch_ollama_20260327_124945.jsonl +35 -0
  10. src/dartlab/ai/eval/batchResults/batch_ollama_20260327_131602.jsonl +4 -0
  11. src/dartlab/ai/eval/batchResults/batch_ollama_20260327_132810.jsonl +11 -0
  12. src/dartlab/ai/eval/diagnosisReports/diagnosis_batch_20260327_124945.md +21 -0
  13. src/dartlab/ai/eval/diagnosisReports/diagnosis_batch_20260327_131602.md +15 -0
  14. src/dartlab/ai/memory/__init__.py +8 -0
  15. src/dartlab/ai/memory/store.py +154 -0
  16. src/dartlab/ai/memory/summarizer.py +55 -0
  17. src/dartlab/ai/providers/oauth_codex.py +57 -11
  18. src/dartlab/ai/runtime/agent.py +124 -7
  19. src/dartlab/ai/runtime/core.py +40 -2
  20. src/dartlab/ai/runtime/run_modes.py +87 -2
  21. src/dartlab/ai/runtime/scratchpad.py +115 -0
  22. src/dartlab/ai/skills/__init__.py +9 -0
  23. src/dartlab/ai/skills/catalog.py +145 -0
  24. src/dartlab/ai/skills/registry.py +58 -0
  25. src/dartlab/ai/tools/defaults/helpers.py +5 -2
  26. src/dartlab/cli/commands/chat.py +472 -0
  27. src/dartlab/cli/commands/collect.py +51 -0
  28. src/dartlab/cli/parser.py +1 -0
  29. src/dartlab/core/dataConfig.py +4 -0
  30. src/dartlab/core/dataLoader.py +5 -2
  31. src/dartlab/market/_helpers.py +78 -5
  32. src/dartlab/market/scan/__init__.py +5 -0
  33. src/dartlab/market/scan/builder.py +436 -0
  34. src/dartlab/providers/dart/_sections_source.py +151 -1
  35. src/dartlab/review/builders.py +9 -12
Dockerfile CHANGED
@@ -4,14 +4,34 @@ WORKDIR /app
4
 
5
  RUN apt-get update && apt-get install -y --no-install-recommends \
6
  build-essential \
 
 
7
  && rm -rf /var/lib/apt/lists/*
8
 
9
- COPY pyproject.toml README.md ./
 
 
 
 
 
 
 
 
 
 
 
10
  COPY src/ src/
 
 
 
 
11
 
12
- RUN pip install --no-cache-dir -e ".[ai]"
 
 
13
 
14
  ENV SPACE_ID=1
 
15
 
16
  EXPOSE 7860
17
 
 
4
 
5
  RUN apt-get update && apt-get install -y --no-install-recommends \
6
  build-essential \
7
+ libxml2-dev \
8
+ libxslt1-dev \
9
  && rm -rf /var/lib/apt/lists/*
10
 
11
+ # ํ•ต์‹ฌ ์˜์กด์„ฑ๋งŒ ๋จผ์ € ์„ค์น˜ (wheel ์šฐ์„ , ๋นŒ๋“œ ์‹คํŒจ ๋ฐฉ์ง€)
12
+ RUN pip install --no-cache-dir \
13
+ polars \
14
+ beautifulsoup4 lxml \
15
+ httpx requests orjson \
16
+ openpyxl rich plotly \
17
+ prompt-toolkit \
18
+ alive-progress \
19
+ diff-match-patch \
20
+ fastapi uvicorn[standard] sse-starlette msgpack
21
+
22
+ COPY pyproject.toml ./
23
  COPY src/ src/
24
+ RUN touch README.md
25
+
26
+ # --no-deps: ์œ„์—์„œ ์ด๋ฏธ ์„ค์น˜ํ•œ ์˜์กด์„ฑ ์žฌ์„ค์น˜ ๋ฐฉ์ง€, marimo/mcp ๊ฑด๋„ˆ๋œ€
27
+ RUN pip install --no-cache-dir --no-deps -e .
28
 
29
+ # HF Spaces user
30
+ RUN useradd -m -u 1000 user
31
+ USER user
32
 
33
  ENV SPACE_ID=1
34
+ ENV HOME=/home/user
35
 
36
  EXPOSE 7860
37
 
README_PROJECT.md DELETED
@@ -1,1108 +0,0 @@
1
- <div align="center">
2
-
3
- <br>
4
-
5
- <img alt="DartLab" src=".github/assets/logo.png" width="180">
6
-
7
- <h3>DartLab</h3>
8
-
9
- <p><b>One stock code. The whole story.</b></p>
10
- <p>DART + EDGAR filings, structured and comparable โ€” in one line of Python.</p>
11
-
12
- <p>
13
- <a href="https://pypi.org/project/dartlab/"><img src="https://img.shields.io/pypi/v/dartlab?style=for-the-badge&color=ea4647&labelColor=050811&logo=pypi&logoColor=white" alt="PyPI"></a>
14
- <a href="https://pypi.org/project/dartlab/"><img src="https://img.shields.io/pypi/pyversions/dartlab?style=for-the-badge&color=c83232&labelColor=050811&logo=python&logoColor=white" alt="Python"></a>
15
- <a href="LICENSE"><img src="https://img.shields.io/badge/License-MIT-94a3b8?style=for-the-badge&labelColor=050811" alt="License"></a>
16
- <a href="https://github.com/eddmpython/dartlab/actions/workflows/ci.yml"><img src="https://img.shields.io/github/actions/workflow/status/eddmpython/dartlab/ci.yml?branch=master&style=for-the-badge&labelColor=050811&logo=github&logoColor=white&label=CI" alt="CI"></a>
17
- <a href="https://eddmpython.github.io/dartlab/"><img src="https://img.shields.io/badge/Docs-GitHub_Pages-38bdf8?style=for-the-badge&labelColor=050811&logo=github-pages&logoColor=white" alt="Docs"></a>
18
- <a href="https://eddmpython.github.io/dartlab/blog/"><img src="https://img.shields.io/badge/Blog-120%2B_Articles-fbbf24?style=for-the-badge&labelColor=050811&logo=rss&logoColor=white" alt="Blog"></a>
19
- </p>
20
-
21
- <p>
22
- <a href="https://eddmpython.github.io/dartlab/">Docs</a> ยท <a href="https://eddmpython.github.io/dartlab/blog/">Blog</a> ยท <a href="https://huggingface.co/spaces/eddmpython/dartlab">Live Demo</a> ยท <a href="notebooks/marimo/">Marimo Notebooks</a> ยท <a href="https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/01_quickstart.ipynb">Open in Colab</a> ยท <a href="README_KR.md">ํ•œ๊ตญ์–ด</a> ยท <a href="https://buymeacoffee.com/eddmpython">Sponsor</a>
23
- </p>
24
-
25
- <p>
26
- <a href="https://huggingface.co/datasets/eddmpython/dartlab-data"><img src="https://img.shields.io/badge/Data-HuggingFace-ffd21e?style=for-the-badge&labelColor=050811&logo=huggingface&logoColor=white" alt="HuggingFace Data"></a>
27
- </p>
28
-
29
- </div>
30
-
31
- > **Note:** DartLab is under active development. APIs may change between versions, and documentation may lag behind the latest code.
32
-
33
- ## Install
34
-
35
- Requires **Python 3.12+**.
36
-
37
- ```bash
38
- # Core โ€” financial statements, sections, Company
39
- uv add dartlab
40
-
41
- # or with pip
42
- pip install dartlab
43
- ```
44
-
45
- ### Optional Extras
46
-
47
- Install only what you need:
48
-
49
- ```bash
50
- uv add "dartlab[ai]" # web UI, server, streaming (FastAPI + uvicorn)
51
- uv add "dartlab[llm]" # LLM analysis (OpenAI)
52
- uv add "dartlab[charts]" # Plotly charts, network graphs (plotly + networkx + scipy)
53
- uv add "dartlab[mcp]" # MCP server for Claude Desktop / Code / Cursor
54
- uv add "dartlab[channel]" # web UI + cloudflared tunnel sharing
55
- uv add "dartlab[channel-ngrok]" # web UI + ngrok tunnel sharing
56
- uv add "dartlab[channel-full]" # all channels + Telegram / Slack / Discord bots
57
- uv add "dartlab[all]" # everything above (except channel bots)
58
- ```
59
-
60
- **Common combinations:**
61
-
62
- ```bash
63
- # financial analysis + AI chat
64
- uv add "dartlab[ai,llm]"
65
-
66
- # full analysis suite โ€” charts, AI, LLM
67
- uv add "dartlab[ai,llm,charts]"
68
-
69
- # share analysis with team via tunnel
70
- uv add "dartlab[channel]"
71
- ```
72
-
73
- ### From Source
74
-
75
- ```bash
76
- git clone https://github.com/eddmpython/dartlab.git
77
- cd dartlab && uv pip install -e ".[all]"
78
-
79
- # or with pip
80
- pip install -e ".[all]"
81
- ```
82
-
83
- PyPI releases are published only when the core is stable. If you want the latest features (including experimental ones like audit, forecast, valuation), clone the repo directly โ€” but expect occasional breaking changes.
84
-
85
- ### Desktop App (Alpha)
86
-
87
- Skip all installation steps โ€” download the standalone Windows launcher:
88
-
89
- - **[Download DartLab.exe](https://github.com/eddmpython/dartlab-desktop/releases/latest/download/DartLab.exe)** from [dartlab-desktop](https://github.com/eddmpython/dartlab-desktop)
90
- - Also available from the [DartLab landing page](https://eddmpython.github.io/dartlab/)
91
-
92
- One-click launch โ€” no Python, no terminal, no package manager required. The desktop app bundles the web UI with a built-in Python runtime.
93
-
94
- > **Alpha** โ€” functional but incomplete. The desktop app is a Windows-only `.exe` launcher. macOS/Linux are not yet supported.
95
-
96
- ---
97
-
98
- **No data setup required.** When you create a `Company`, dartlab automatically downloads the required data from [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data) (DART) or SEC API (EDGAR). The second run loads instantly from local cache.
99
-
100
- ## Quick Start
101
-
102
- Pick any company. Get the whole picture.
103
-
104
- ```python
105
- import dartlab
106
-
107
- # Samsung Electronics โ€” from raw filings to structured data
108
- c = dartlab.Company("005930")
109
- c.sections # every topic, every period, side by side
110
- c.show("businessOverview") # what this company actually does
111
- c.diff("businessOverview") # what changed since last year
112
- c.BS # standardized balance sheet
113
- c.ratios # 47 financial ratios, already calculated
114
-
115
- # Apple โ€” same interface, different country
116
- us = dartlab.Company("AAPL")
117
- us.show("business")
118
- us.ratios
119
-
120
- # No code needed โ€” ask in natural language
121
- dartlab.ask("Analyze Samsung Electronics financial health")
122
- ```
123
-
124
- ## What DartLab Is
125
-
126
- A public company files hundreds of pages every quarter. Inside those pages is everything โ€” revenue trends, risk warnings, management strategy, competitive position. The complete truth about a company, written by the company itself.
127
-
128
- Nobody reads it.
129
-
130
- Not because they don't want to. Because the same information is named differently by every company, structured differently every year, and scattered across formats designed for regulators, not readers. The same "revenue" appears as `ifrs-full_Revenue`, `dart_Revenue`, `SalesRevenue`, or dozens of Korean variations.
131
-
132
- DartLab changes who can access this information. Two engines turn raw filings into one comparable map:
133
-
134
- ### The Two Problems DartLab Solves
135
-
136
- **1. The same company says different things differently every year.**
137
-
138
- Sections horizontalization normalizes every disclosure section into a **topic ร— period** grid. Different titles across years and industries all resolve to the same canonical topic:
139
-
140
- ```
141
- 2025Q4 2024Q4 2024Q3 2023Q4 โ€ฆ
142
- companyOverview โœ“ โœ“ โœ“ โœ“
143
- businessOverview โœ“ โœ“ โœ“ โœ“
144
- productService โœ“ โœ“ โœ“ โœ“
145
- salesOrder โœ“ โœ“ โ€” โœ“
146
- employee โœ“ โœ“ โœ“ โœ“
147
- dividend โœ“ โœ“ โœ“ โœ“
148
- audit โœ“ โœ“ โœ“ โœ“
149
- โ€ฆ (98 canonical topics)
150
- ```
151
-
152
- ```
153
- Before (raw section titles): After (canonical topic):
154
- Samsung "II. ์‚ฌ์—…์˜ ๋‚ด์šฉ" โ†’ businessOverview
155
- Hyundai "II. ์‚ฌ์—…์˜ ๋‚ด์šฉ [์ž๋™์ฐจ๋ถ€๋ฌธ]" โ†’ businessOverview
156
- Kakao "2. ์‚ฌ์—…์˜ ๋‚ด์šฉ" โ†’ businessOverview
157
- ```
158
-
159
- The mapping pipeline: **text normalization** โ†’ **545 hardcoded title mappings** โ†’ **73 regex patterns** โ†’ canonical topic. ~95%+ mapping rate across all listed companies. Each cell keeps the full text with heading/body separation, tables, and original evidence. Comparing "what did the company say about risk last year vs. this year" becomes a single `diff()` call.
160
-
161
- **2. Every company names the same number differently.**
162
-
163
- Account standardization normalizes every XBRL account through a 4-step pipeline:
164
-
165
- ```
166
- Raw XBRL account_id
167
- โ†’ Strip prefixes (ifrs-full_, dart_, ifrs_, ifrs-smes_)
168
- โ†’ English ID synonyms (59 rules)
169
- โ†’ Korean name synonyms (104 rules)
170
- โ†’ Learned mapping table (34,249 entries)
171
- โ†’ Result: revenue, operatingIncome, totalAssets, โ€ฆ
172
- ```
173
-
174
- ```
175
- Before (raw XBRL): After (standardized):
176
- Company account_id account_nm โ†’ snakeId label
177
- Samsung ifrs-full_Revenue ์ˆ˜์ต(๋งค์ถœ์•ก) โ†’ revenue ๋งค์ถœ์•ก
178
- SK Hynix dart_Revenue ๋งค์ถœ์•ก โ†’ revenue ๋งค์ถœ์•ก
179
- LG Energy Revenue ๋งค์ถœ โ†’ revenue ๋งค์ถœ์•ก
180
- ```
181
-
182
- ~97% mapping rate. Cross-company comparison requires zero manual work. Combined with `scanAccount` / `scanRatio`, you can compare a single metric across **2,700+ companies** in one call.
183
-
184
- ### Principles โ€” Accessibility and Reliability
185
-
186
- These two principles govern every public API:
187
-
188
- **Accessibility** โ€” One stock code is all you need. `import dartlab` provides access to every feature. No internal DTOs, no extra imports, no data setup. `Company("005930")` auto-downloads from [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data).
189
-
190
- **Reliability** โ€” Numbers are raw originals from DART/EDGAR. Missing data returns `None`, never a guess. `trace(topic)` shows which source was chosen and why. Errors are never swallowed.
191
-
192
- ### Company โ€” The Merged Map
193
-
194
- `Company` uses `sections` as the spine, then overlays stronger data sources:
195
-
196
- ```
197
- Layer What it provides Priority
198
- โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
199
- docs Section text, tables, evidence Base spine
200
- finance BS, IS, CF, ratios, time series Replaces numeric topics
201
- report 28 structured APIs (DART only) Fills structured topics
202
- โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
203
- profile Merged view (default for users) Highest
204
- ```
205
-
206
- ```python
207
- c.docs.sections # pure text source (sections spine)
208
- c.finance.BS # authoritative financial statements
209
- c.report.extract() # structured DART API data
210
- c.profile.sections # merged view โ€” what users see by default
211
- ```
212
-
213
- `c.sections` is the merged view. `c.trace("BS")` tells you which source was chosen and why.
214
-
215
- ### Architecture โ€” Layered by Responsibility
216
-
217
- DartLab follows a strict layered architecture where each layer only depends on layers below it:
218
-
219
- ```
220
- L0 core/ Protocols, finance utils, docs utils, registry
221
- L1 providers/ Country-specific data (DART, EDGAR, EDINET)
222
- gather/ External market data (Naver, Yahoo, FRED)
223
- market/ Market-wide scanning (2,700+ companies)
224
- L2 analysis/ Analytical engines (valuation, risk, insights, event study)
225
- L3 ai/ LLM-powered analysis (9 providers)
226
- ```
227
-
228
- Import direction is enforced by CI โ€” no reverse dependencies allowed.
229
-
230
- ### Extensibility โ€” Zero Core Modification
231
-
232
- Adding a new country requires zero changes to core code:
233
-
234
- 1. Create a provider package under `providers/`
235
- 2. Implement `canHandle(code) -> bool` and `priority() -> int`
236
- 3. Register via `entry_points` in `pyproject.toml`
237
-
238
- ```python
239
- dartlab.Company("005930") # โ†’ DART provider (priority 10)
240
- dartlab.Company("AAPL") # โ†’ EDGAR provider (priority 20)
241
- ```
242
-
243
- The facade iterates providers by priority โ€” first match wins. This follows the same pattern as OpenBB's provider system and scikit-learn's estimator registration.
244
-
245
- ## Core Features
246
-
247
- ### Show, Trace, Diff
248
-
249
- ```python
250
- c = dartlab.Company("005930")
251
-
252
- # show โ€” open any topic with source-aware priority
253
- c.show("BS") # โ†’ finance DataFrame
254
- c.show("overview") # โ†’ sections-based text + tables
255
- c.show("dividend") # โ†’ report DataFrame (all quarters)
256
- c.show("IS", period=["2024Q4", "2023Q4"]) # compare specific periods
257
-
258
- # trace โ€” why a topic came from docs, finance, or report
259
- c.trace("BS") # โ†’ {"primarySource": "finance", ...}
260
-
261
- # diff โ€” text change detection (3 modes)
262
- c.diff() # full summary
263
- c.diff("businessOverview") # topic history
264
- c.diff("businessOverview", "2024", "2025") # line-by-line diff
265
- ```
266
-
267
- What the output looks like:
268
-
269
- ```
270
- >>> c.show("businessOverview")
271
- shape: (12, 5)
272
- โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
273
- โ”‚ blockType โ”‚ nodeType โ”‚ 2024 โ”‚ 2023 โ”‚
274
- โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค
275
- โ”‚ text โ”‚ heading โ”‚ 1. ์‚ฐ์—…์˜ ํŠน์„ฑ โ”‚ 1. ์‚ฐ์—…์˜ ํŠน์„ฑ โ”‚
276
- โ”‚ text โ”‚ body โ”‚ ๋ฐ˜๋„์ฒด ์‚ฐ์—…์€ ๊ธฐ์ˆ  ์ง‘์•ฝ์  โ€ฆ โ”‚ ๋ฐ˜๋„์ฒด ์‚ฐ์—…์€ ๊ธฐ์ˆ  ์ง‘์•ฝ์  โ€ฆ โ”‚
277
- โ”‚ table โ”‚ null โ”‚ DataFrame(5ร—3) โ”‚ DataFrame(5ร—3) โ”‚
278
- โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜
279
-
280
- >>> c.diff("businessOverview", "2023", "2024")
281
- โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
282
- โ”‚ status โ”‚ text โ”‚
283
- โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค
284
- โ”‚ added โ”‚ AI ๋ฐ˜๋„์ฒด ์ˆ˜์š” ๊ธ‰์ฆ์— ๋”ฐ๋ฅธ HBM ๋งค์ถœ ํ™•๋Œ€ โ€ฆ โ”‚
285
- โ”‚ modified โ”‚ ๋งค์ถœ์•ก 258.9์กฐ์› โ†’ 300.9์กฐ์› โ”‚
286
- โ”‚ removed โ”‚ ๋ฐ˜๋„์ฒด ๋ถ€๋ฌธ ์ˆ˜์ต์„ฑ ์•…ํ™” ์šฐ๋ ค โ€ฆ โ”‚
287
- โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜
288
- ```
289
-
290
- ### Finance
291
-
292
- ```python
293
- c.BS # balance sheet (account ร— period, newest first)
294
- c.IS # income statement
295
- c.CF # cash flow
296
- c.ratios # ratio time series DataFrame (6 categories ร— period)
297
- c.finance.ratioSeries # ratio time series across years
298
- c.finance.timeseries # raw account time series
299
- c.annual # annual time series
300
- c.filings() # disclosure document list (Tier 1 Stable)
301
- ```
302
-
303
- All accounts are normalized through the 4-step standardization pipeline โ€” Samsung's `revenue` and LG's `revenue` are the same `snakeId`. Ratios cover 6 categories: profitability, stability, growth, efficiency, cashflow, and valuation.
304
-
305
- ### Market-wide Financial Screening
306
-
307
- Scan a single account or ratio across **all listed companies** in one call โ€” 2,700+ DART firms or 500+ EDGAR firms. Returns a wide Polars DataFrame (rows = companies, columns = periods, newest first).
308
-
309
- ```python
310
- import dartlab
311
-
312
- # scan a single account across all listed companies
313
- dartlab.scanAccount("๋งค์ถœ์•ก") # revenue, quarterly standalone
314
- dartlab.scanAccount("operating_profit", annual=True) # annual basis
315
- dartlab.scanAccount("total_assets", market="edgar") # US EDGAR
316
-
317
- # scan a ratio across all listed companies
318
- dartlab.scanRatio("roe") # quarterly ROE for all firms
319
- dartlab.scanRatio("debtRatio", annual=True) # annual debt-to-equity
320
-
321
- # list available ratios (13 ratios: profitability, stability, growth, efficiency, cashflow)
322
- dartlab.scanRatioList()
323
- ```
324
-
325
- Accepts both Korean names (`๋งค์ถœ์•ก`) and English snakeIds (`sales`) โ€” same 4-step normalization as Company finance. Reads 2,700+ parquet files in parallel via ThreadPool, typically completes in ~3 seconds.
326
-
327
- > **Requires pre-downloaded data.** Market-wide functions (`scanAccount`, `screen`, `digest`, etc.) operate on local data โ€” individual `Company()` calls only download one firm at a time. Download all data first:
328
- > ```python
329
- > pip install dartlab[hf]
330
- > dartlab.downloadAll("finance") # ~600 MB, 2,700+ firms
331
- > dartlab.downloadAll("report") # ~320 MB (governance/workforce/capital/debt)
332
- > dartlab.downloadAll("docs") # ~8 GB (digest/signal โ€” large)
333
- > ```
334
-
335
- ## Review โ€” Structured Company Analysis
336
-
337
- > **Experimental** โ€” the review system is under active development. Templates, blocks, and output formats may change between versions.
338
-
339
- DartLab's review system assembles financial data into structured, readable reports.
340
-
341
- ### Templates
342
-
343
- Pre-built block combinations that cover key analysis areas:
344
-
345
- ```python
346
- c = dartlab.Company("005930")
347
-
348
- c.review("์ˆ˜์ต๊ตฌ์กฐ") # revenue structure โ€” segments, growth, concentration
349
- c.review("์ž๊ธˆ์กฐ๋‹ฌ") # capital structure โ€” debt, liquidity, interest burden
350
- c.review() # all templates
351
- ```
352
-
353
- ### Block Assembly
354
-
355
- Every review is built from reusable blocks. Get the full block dictionary and assemble your own:
356
-
357
- ```python
358
- from dartlab.review import blocks, Review
359
-
360
- b = blocks(c) # dict of 16 pre-built blocks
361
- list(b.keys()) # โ†’ ["profile", "segmentComposition", "growth", ...]
362
-
363
- # pick what you need
364
- Review([
365
- b["segmentComposition"],
366
- b["growth"],
367
- c.select("IS", ["๋งค์ถœ์•ก"]), # mix with raw data
368
- ])
369
- ```
370
-
371
- ### Reviewer โ€” AI Layer
372
-
373
- Add LLM-powered opinions on top of data blocks. Works with any provider:
374
-
375
- ```python
376
- c.reviewer() # all sections + AI opinion
377
- c.reviewer("์ˆ˜์ต๊ตฌ์กฐ") # single section + AI
378
- c.reviewer(guide="Evaluate from semiconductor cycle perspective") # custom guide
379
- ```
380
-
381
- **Free AI providers** โ€” no paid API key required:
382
-
383
- | Provider | Setup |
384
- |----------|-------|
385
- | Gemini | `dartlab setup gemini` |
386
- | Groq | `dartlab setup groq` |
387
- | Cerebras | `dartlab setup cerebras` |
388
- | Mistral | `dartlab setup mistral` |
389
-
390
- Or use any OpenAI-compatible endpoint:
391
- ```bash
392
- dartlab setup custom --base-url http://localhost:11434/v1 # Ollama local
393
- ```
394
-
395
- ### Customization
396
-
397
- - **Templates**: Pre-defined block combinations (`์ˆ˜์ต๊ตฌ์กฐ`, `์ž๊ธˆ์กฐ๋‹ฌ`)
398
- - **Free assembly**: Mix any blocks + raw DataFrames in `Review([...])`
399
- - **Guide**: Pass `guide="..."` to `c.reviewer()` for domain-specific AI analysis
400
- - **Layout**: `ReviewLayout(indentH1=2, gapAfterH1=1, ...)` for rendering control
401
- - **Render formats**: `review.render("rich" | "html" | "markdown" | "json")`
402
-
403
- See [notebooks/marimo/sampleReview.py](notebooks/marimo/sampleReview.py) for interactive examples.
404
-
405
- ## Additional Features
406
-
407
- > Features below are **beta** or **experimental** โ€” APIs may change. See [stability](docs/stability.md).
408
-
409
- ### Insights (beta)
410
-
411
- > **Beta** โ€” API may change after a warning. See [stability](docs/stability.md).
412
-
413
- ```python
414
- c.insights # 10-area analysis
415
- c.insights.grades() # โ†’ {"performance": "A", "profitability": "B", โ€ฆ}
416
- c.insights.performance.grade # โ†’ "A"
417
- c.insights.performance.details # โ†’ ["Revenue growth +8.3%", โ€ฆ]
418
- c.insights.anomalies # โ†’ outliers and red flags
419
-
420
- # distress scorecard โ€” 6-model bankruptcy/fraud prediction
421
- c.insights.distress # Altman Z-Score, Beneish M-Score, Ohlson O-Score,
422
- # Merton Distance-to-Default, Piotroski F-Score, Sloan Ratio
423
- ```
424
-
425
- ### Valuation, Forecast & Simulation
426
-
427
- ```python
428
- dartlab.valuation("005930") # DCF + DDM + relative valuation
429
- dartlab.forecast("005930") # revenue forecast (4-source ensemble)
430
- dartlab.simulation("005930") # scenario simulation (macro presets)
431
-
432
- # also available as Company methods
433
- c.valuation()
434
- c.forecast(horizon=3)
435
- c.simulation(scenarios=["adverse", "rate_hike"])
436
- ```
437
-
438
- Auto-detects currency โ€” KRW for DART companies, USD for EDGAR. Works with both `dartlab.valuation("AAPL")` and `dartlab.valuation("005930")`.
439
-
440
- ### Audit (beta)
441
-
442
- > **Beta** โ€” API may change after a warning. See [stability](docs/stability.md).
443
-
444
- ```python
445
- dartlab.audit("005930") # 11 red flag detectors
446
-
447
- # Benford's Law (digit distribution), auditor change (PCAOB AS 3101),
448
- # going concern (ISA 570), internal control (SOX 302/404),
449
- # revenue quality (Dechow & Dichev), Merton default probability, ...
450
- ```
451
-
452
- ### Market Intelligence (beta)
453
-
454
- > **Beta** โ€” API may change after a warning. See [stability](docs/stability.md).
455
-
456
- ```python
457
- dartlab.digest() # market-wide disclosure change digest
458
- dartlab.digest(sector="๋ฐ˜๋„์ฒด") # sector filter
459
- dartlab.groupHealth() # group health: network ร— financial ratios
460
- ```
461
-
462
- ### Modules
463
-
464
- DartLab exposes 100+ modules across 6 categories:
465
-
466
- ```bash
467
- dartlab modules # list all modules
468
- dartlab modules --category finance # filter by category
469
- dartlab modules --search dividend # search by keyword
470
- ```
471
-
472
- ```python
473
- c.topics # list all available topics for this company
474
- ```
475
-
476
- Categories: `finance` (statements, ratios), `report` (dividend, governance, audit), `notes` (K-IFRS annotations), `disclosure` (narrative text), `analysis` (insights, rankings), `raw` (original parquets).
477
-
478
- ### Charts & Visualization (beta)
479
-
480
- > **Beta** โ€” API may change after a warning. See [stability](docs/stability.md).
481
-
482
- ```python
483
- c = dartlab.Company("005930")
484
-
485
- # one-liner Plotly charts
486
- dartlab.chart.revenue(c).show() # revenue + operating margin combo
487
- dartlab.chart.cashflow(c).show() # operating/investing/financing CF
488
- dartlab.chart.dividend(c).show() # DPS + yield + payout ratio
489
- dartlab.chart.profitability(c).show() # ROE, operating margin, net margin
490
-
491
- # auto-detect all available charts
492
- specs = dartlab.chart.auto_chart(c)
493
- dartlab.chart.chart_from_spec(specs[0]).show()
494
-
495
- # generic charts from any DataFrame
496
- dartlab.chart.line(c.dividend, y=["dps"])
497
- dartlab.chart.bar(df, x="year", y=["revenue", "operating_income"], stacked=True)
498
- ```
499
-
500
- Data tools:
501
-
502
- ```python
503
- dartlab.table.yoy_change(c.dividend, value_cols=["dps"]) # add YoY% columns
504
- dartlab.table.format_korean(c.BS, unit="๋ฐฑ๋งŒ์›") # 1.2์กฐ์›, 350์–ต์›
505
- dartlab.table.summary_stats(c.dividend, value_cols=["dps"]) # mean/CAGR/trend
506
- dartlab.text.extract_keywords(narrative) # frequency-based keywords
507
- dartlab.text.sentiment_indicators(narrative) # positive/negative/risk
508
- ```
509
-
510
- Install chart dependencies: `uv add "dartlab[charts]"`
511
-
512
- ### Network โ€” Affiliate Map (beta)
513
-
514
- > **Beta** โ€” API may change after a warning. See [stability](docs/stability.md).
515
-
516
- ```python
517
- c = dartlab.Company("005930")
518
-
519
- # interactive vis.js graph in browser
520
- c.network().show() # ego view (1 hop)
521
- c.network(hops=2).show() # 2-hop neighborhood
522
-
523
- # DataFrame views
524
- c.network("members") # group affiliates
525
- c.network("edges") # investment/shareholder connections
526
- c.network("cycles") # circular ownership paths
527
-
528
- # full market network
529
- dartlab.network().show()
530
- ```
531
-
532
- ### Market Scan (beta)
533
-
534
- > **Beta** โ€” API may change after a warning. See [stability](docs/stability.md).
535
-
536
- ```python
537
- c = dartlab.Company("005930")
538
-
539
- # one company โ†’ market-wide
540
- c.governance() # single company
541
- c.governance("all") # full market DataFrame
542
- dartlab.governance() # module-level scan
543
- dartlab.workforce()
544
- dartlab.capital()
545
- dartlab.debt()
546
-
547
- # screening & benchmarking
548
- dartlab.screen() # multi-factor screening
549
- dartlab.benchmark() # peer comparison
550
- dartlab.signal() # change detection signals
551
- ```
552
-
553
- ### Market Data Collection (beta)
554
-
555
- > **Beta** โ€” API may change after a warning. See [stability](docs/stability.md).
556
-
557
- The Gather engine collects external market data as **Polars DataFrames** โ€” timeseries by default. Every request goes through automatic fallback chains, circuit breaker isolation, and TTL caching. All methods are synchronous โ€” async parallel execution is handled internally.
558
-
559
- ```python
560
- import dartlab
561
-
562
- # OHLCV timeseries โ€” adjusted prices, 6000+ trading days in a single request
563
- dartlab.price("005930") # KR: 1-year default, Polars DataFrame
564
- dartlab.price("005930", start="2015-01-01") # custom range
565
- dartlab.price("AAPL", market="US") # US via Yahoo Finance chart API
566
- dartlab.price("005930", snapshot=True) # opt-in: current price snapshot
567
-
568
- # supply/demand flow timeseries (KR only)
569
- dartlab.flow("005930") # DataFrame (date, foreignNet, institutionNet, ...)
570
-
571
- # macro indicators โ€” full wide DataFrame
572
- dartlab.macro() # KR 12 indicators (CPI, rates, FX, production, ...)
573
- dartlab.macro("US") # US 25 indicators (GDP, CPI, Fed Funds, S&P500, ...)
574
- dartlab.macro("CPI") # single indicator (auto-detects KR)
575
- dartlab.macro("FEDFUNDS") # single indicator (auto-detects US)
576
-
577
- # consensus, news
578
- dartlab.consensus("005930") # target price & analyst opinion
579
- dartlab.news("์‚ผ์„ฑ์ „์ž") # Google News RSS โ†’ DataFrame
580
- ```
581
-
582
- **How data is collected โ€” don't worry, it's safe:**
583
-
584
- | Source | Data | Method |
585
- |--------|------|--------|
586
- | Naver Chart API | KR OHLCV (adjusted prices) | `fchart.stock.naver.com` โ€” 1 request per stock, max 6000 days |
587
- | Yahoo Finance v8 | US/Global OHLCV | `query2.finance.yahoo.com/v8/finance/chart` โ€” public chart API |
588
- | ECOS (Bank of Korea) | KR macro indicators | Official API with user's own key |
589
- | FRED (St. Louis Fed) | US macro indicators | Official API with user's own key |
590
- | Naver Mobile API | Consensus, flow, sector PER | `m.stock.naver.com/api` โ€” JSON endpoints |
591
- | FMP | Fallback for US history | Financial Modeling Prep API (optional) |
592
-
593
- **Safety infrastructure:**
594
-
595
- - **Rate limiting** โ€” per-domain RPM caps (Naver 30, ECOS 30, FRED 120) with async queue
596
- - **Circuit breaker** โ€” 3 consecutive failures โ†’ source disabled for 60s, half-open retry
597
- - **Fallback chains** โ€” KR: naver โ†’ yahoo_direct โ†’ yahoo / US: yahoo_direct โ†’ fmp โ†’ yahoo
598
- - **Stale-while-revalidate** โ€” returns cached data on failure, warns via `log.warning`
599
- - **User-Agent rotation** โ€” randomized per request to avoid fingerprinting
600
- - **No silent failures** โ€” all API errors logged at warning level, never swallowed
601
- - **No scraping** โ€” all sources are public APIs or official data endpoints
602
-
603
- ### Cross-Border Analysis (beta)
604
-
605
- > **Beta** โ€” API may change after a warning. See [stability](docs/stability.md).
606
-
607
- ```python
608
- c = dartlab.Company("005930")
609
-
610
- # keyword frequency across disclosure periods
611
- c.keywordTrend(keyword="AI") # topic ร— period ร— keyword count
612
- c.keywordTrend() # all 54 built-in keywords
613
-
614
- # news headlines
615
- c.news() # recent 30 days
616
- dartlab.news("AAPL", market="US") # US company news
617
-
618
- # global peer mapping (WICS โ†’ GICS sector)
619
- dartlab.crossBorderPeers("005930") # โ†’ ["AAPL", "MSFT", "NVDA", "TSM", "AVGO"]
620
-
621
- # currency conversion (FRED-based)
622
- from dartlab.engines.common.finance import getExchangeRate, convertValue
623
- getExchangeRate("KRW") # KRW/USD rate
624
- convertValue(1_000_000, "KRW", "USD") # โ†’ ~730.0
625
-
626
- # audit opinion normalization (KR/EN/JP โ†’ canonical code)
627
- from dartlab.engines.common.audit import normalizeAuditOpinion
628
- normalizeAuditOpinion("์ ์ •") # โ†’ "unqualified"
629
- normalizeAuditOpinion("Qualified") # โ†’ "qualified"
630
- ```
631
-
632
- Disclosure gap detection runs automatically inside `c.insights` โ€” flags mismatches between text changes and financial health (e.g. risk text surges while financials are stable).
633
-
634
- ### Export (experimental)
635
-
636
- > **Experimental** โ€” Breaking changes possible. Not for production.
637
-
638
- ```bash
639
- dartlab excel "005930" -o samsung.xlsx
640
- ```
641
-
642
- Install: `uv add "dartlab[ai]"` (Excel export is included in the AI extras).
643
-
644
- ### Plugins
645
-
646
- ```python
647
- dartlab.plugins() # list loaded plugins
648
- dartlab.reload_plugins() # rescan after installing a plugin
649
- ```
650
-
651
- Plugins can extend DartLab with custom data sources, tools, or analysis engines. See `dartlab plugin create --help` for scaffolding.
652
-
653
- ## EDGAR (US)
654
-
655
- Same `Company` interface, same account standardization pipeline, different data source. EDGAR data is auto-fetched from the SEC API โ€” no pre-download needed:
656
-
657
- ```python
658
- us = dartlab.Company("AAPL")
659
-
660
- us.sections # 10-K/10-Q sections with heading/body
661
- us.show("business") # business description
662
- us.show("10-K::item1ARiskFactors") # risk factors
663
- us.BS # SEC XBRL balance sheet
664
- us.ratios # same 47 ratios
665
- us.diff("10-K::item7Mdna") # MD&A text changes
666
- us.insights # 10-area grades (A~F)
667
-
668
- # analyst functions โ€” auto-detect USD
669
- dartlab.valuation("AAPL") # DCF + DDM + relative (USD)
670
- dartlab.forecast("AAPL") # revenue forecast (USD)
671
- dartlab.simulation("AAPL") # scenario simulation (US macro presets)
672
- ```
673
-
674
- The interface is identical โ€” same methods, same structure:
675
-
676
- ```python
677
- # Korea (DART) # US (EDGAR)
678
- c = dartlab.Company("005930") c = dartlab.Company("AAPL")
679
- c.sections c.sections
680
- c.show("businessOverview") c.show("business")
681
- c.BS c.BS
682
- c.ratios c.ratios
683
- c.diff("businessOverview") c.diff("10-K::item7Mdna")
684
- c.insights.grades() c.insights.grades()
685
- ```
686
-
687
- ### DART vs EDGAR Namespaces
688
-
689
- | | DART | EDGAR |
690
- |---------------|:--------------:|:--------------:|
691
- | `docs` | โœ“ | โœ“ |
692
- | `finance` | โœ“ | โœ“ |
693
- | `report` | โœ“ (28 API types) | โœ— (not applicable) |
694
- | `profile` | โœ“ | โœ“ |
695
-
696
- DART has a `report` namespace with 28 structured disclosure APIs (dividend, governance, executive compensation, etc.). This does not exist in EDGAR โ€” SEC filings are structured differently.
697
-
698
- **EDGAR topic naming**: Topics use `{formType}::{itemId}` format. Short aliases also work:
699
-
700
- ```python
701
- us.show("10-K::item1Business") # full form
702
- us.show("business") # short alias
703
- us.show("risk") # โ†’ 10-K::item1ARiskFactors
704
- us.show("mdna") # โ†’ 10-K::item7Mdna
705
- ```
706
-
707
- ## AI Analysis
708
-
709
- > **Experimental** โ€” the AI analysis layer and `analysis/` engines are under active development. APIs, output formats, and available tools may change between versions.
710
-
711
- > **Tip:** New to financial analysis or prefer natural language? Use `dartlab.ask()` โ€” the AI assistant handles everything from data download to analysis. No coding knowledge required.
712
-
713
- DartLab includes a built-in AI analysis layer that feeds structured company data to LLMs. **No code required** โ€” you can ask questions in plain language and DartLab handles everything: data selection, context assembly, and streaming the answer.
714
-
715
- ```bash
716
- # terminal one-liner โ€” no Python needed
717
- dartlab ask "์‚ผ์„ฑ์ „์ž ์žฌ๋ฌด๊ฑด์ „์„ฑ ๋ถ„์„ํ•ด์ค˜"
718
- ```
719
-
720
- DartLab structures the data, selects relevant context (financials, insights, sector benchmarks), and lets the LLM explain:
721
-
722
- ```
723
- $ dartlab ask "์‚ผ์„ฑ์ „์ž ์žฌ๋ฌด๊ฑด์ „์„ฑ ๋ถ„์„ํ•ด์ค˜"
724
-
725
- ์‚ผ์„ฑ์ „์ž์˜ ์žฌ๋ฌด๊ฑด์ „์„ฑ์€ A๋“ฑ๊ธ‰์ž…๋‹ˆ๋‹ค.
726
-
727
- โ–ธ ๋ถ€์ฑ„๋น„์œจ 31.8% โ€” ์—…์ข… ํ‰๊ท (45.2%) ๋Œ€๋น„ ์–‘ํ˜ธ
728
- โ–ธ ์œ ๋™๋น„์œจ 258.6% โ€” 200% ์•ˆ์ „ ๊ธฐ์ค€ ์ƒํšŒ
729
- โ–ธ ์ด์ž๋ณด์ƒ๋ฐฐ์ˆ˜ 22.1๋ฐฐ โ€” ์ด์ž ๋ถ€๋‹ด ๋งค์šฐ ๋‚ฎ์Œ
730
- โ–ธ ROE ํšŒ๋ณต์„ธ: 1.6% โ†’ 10.2% (4๋ถ„๊ธฐ ์—ฐ์† ๊ฐœ์„ )
731
-
732
- [๋ฐ์ดํ„ฐ ์ถœ์ฒ˜: 2024Q4 ์‚ฌ์—…๋ณด๊ณ ์„œ, dartlab insights ์—”์ง„]
733
- ```
734
-
735
- For real-time market-wide disclosure questions (e.g. "์ตœ๊ทผ 7์ผ ์ˆ˜์ฃผ๊ณต์‹œ ์•Œ๋ ค์ค˜"), the AI uses your `OpenDART API key` to search recent filings directly. Store the key in project `.env` or via UI Settings.
736
-
737
- The 2-tier architecture means basic analysis works with any provider, while tool-calling providers (OpenAI, Claude) can go deeper by requesting additional data mid-conversation.
738
-
739
- ### Python API
740
-
741
- ```python
742
- import dartlab
743
-
744
- # streams to stdout, returns full text
745
- answer = dartlab.ask("์‚ผ์„ฑ์ „์ž ์žฌ๋ฌด๊ฑด์ „์„ฑ ๋ถ„์„ํ•ด์ค˜")
746
-
747
- # provider + model override
748
- answer = dartlab.ask("์‚ผ์„ฑ์ „์ž ๋ถ„์„", provider="openai", model="gpt-4o")
749
-
750
- # data filtering
751
- answer = dartlab.ask("์‚ผ์„ฑ์ „์ž ํ•ต์‹ฌ ํฌ์ธํŠธ", include=["BS", "IS"])
752
-
753
- # analysis pattern (framework-guided)
754
- answer = dartlab.ask("์‚ผ์„ฑ์ „์ž ๋ถ„์„", pattern="financial")
755
-
756
- # agent mode โ€” LLM selects tools for deeper analysis
757
- answer = dartlab.chat("005930", "๋ฐฐ๋‹น ์ถ”์„ธ๋ฅผ ๋ถ„์„ํ•˜๊ณ  ์ด์ƒ ์ง•ํ›„๋ฅผ ์ฐพ์•„์ค˜")
758
- ```
759
-
760
- ### CLI
761
-
762
- ```bash
763
- # provider setup โ€” free providers first
764
- dartlab setup # list all providers
765
- dartlab setup gemini # Google Gemini (free)
766
- dartlab setup groq # Groq (free)
767
-
768
- # status
769
- dartlab status # all providers (table view)
770
- dartlab status --cost # cumulative token/cost stats
771
-
772
- # ask questions (streaming by default)
773
- dartlab ask "์‚ผ์„ฑ์ „์ž ์žฌ๋ฌด๊ฑด์ „์„ฑ ๋ถ„์„ํ•ด์ค˜"
774
- dartlab ask "AAPL risk analysis" -p ollama
775
- dartlab ask --continue "๋ฐฐ๋‹น ์ถ”์„ธ๋Š”?"
776
-
777
- # auto-generate report
778
- dartlab report "์‚ผ์„ฑ์ „์ž" -o report.md
779
-
780
- # web UI
781
- dartlab # open browser UI
782
- dartlab --help # show all commands
783
- ```
784
-
785
- <details>
786
- <summary>All CLI commands (16)</summary>
787
-
788
- | Category | Command | Description |
789
- |----------|---------|-------------|
790
- | Data | `show` | Open any topic by name |
791
- | Data | `search` | Find companies by name or code |
792
- | Data | `statement` | BS / IS / CF / SCE output |
793
- | Data | `sections` | Raw docs sections |
794
- | Data | `profile` | Company index and facts |
795
- | Data | `modules` | List all available modules |
796
- | AI | `ask` | Natural language question |
797
- | AI | `report` | Auto-generate analysis report |
798
- | Export | `excel` | Export to Excel (experimental) |
799
- | Collect | `collect` | Download / refresh / batch collect |
800
- | Collect | `collect --check` | Check freshness (new filings) |
801
- | Collect | `collect --incremental` | Incremental collect (missing only) |
802
- | Server | `ai` | Launch web UI (localhost:8400) |
803
- | Server | `share` | Tunnel sharing (ngrok / cloudflared) |
804
- | Server | `status` | Provider connection status |
805
- | Server | `setup` | Provider setup wizard |
806
- | MCP | `mcp` | Start MCP stdio server |
807
- | Plugin | `plugin` | Create / list plugins |
808
-
809
- </details>
810
-
811
- ### Providers
812
-
813
- **Free API key providers** โ€” sign up, paste the key, start analyzing:
814
-
815
- | Provider | Free Tier | Model | Setup |
816
- |----------|-----------|-------|-------|
817
- | `gemini` | Gemini 2.5 Pro/Flash free | Gemini 2.5 | `dartlab setup gemini` |
818
- | `groq` | 6Kโ€“30K TPM free | LLaMA 3.3 70B | `dartlab setup groq` |
819
- | `cerebras` | 1M tokens/day permanent | LLaMA 3.3 70B | `dartlab setup cerebras` |
820
- | `mistral` | 1B tokens/month free | Mistral Small | `dartlab setup mistral` |
821
-
822
- **Other providers:**
823
-
824
- | Provider | Auth | Cost | Tool Calling |
825
- |----------|------|------|:---:|
826
- | `oauth-codex` | ChatGPT subscription (Plus/Team/Enterprise) | Included in subscription | Yes |
827
- | `openai` | API key (`OPENAI_API_KEY`) | Pay-per-token | Yes |
828
- | `ollama` | Local install, no account needed | Free | Depends on model |
829
- | `codex` | Codex CLI installed locally | Free (uses your Codex session) | Yes |
830
- | `custom` | Any OpenAI-compatible endpoint | Varies | Varies |
831
-
832
- **Auto-fallback:** Set multiple free API keys and DartLab automatically switches to the next provider when one hits its rate limit. Use `provider="free"` to enable the fallback chain:
833
-
834
- ```python
835
- dartlab.ask("์‚ผ์„ฑ์ „์ž ๋ถ„์„", provider="free")
836
- ```
837
-
838
- **Why no Claude provider?** Anthropic does not offer OAuth-based access. Without OAuth, there is no way to let users authenticate with their existing subscription โ€” we would have to ask users to paste API keys, which goes against DartLab's frictionless design. If Anthropic adds OAuth support in the future, we will add a Claude provider. For now, Claude works through **MCP** (see below) โ€” Claude Desktop, Claude Code, and Cursor can call DartLab's 60 tools directly.
839
-
840
- **`oauth-codex`** is the recommended provider โ€” if you have a ChatGPT subscription, it works out of the box with no API keys. Run `dartlab setup oauth-codex` to authenticate.
841
-
842
- **Web UI (`dartlab`)** launches a browser-based chat interface for interactive analysis. This feature is currently **experimental** โ€” we are evaluating the right scope and UX for visualization and collaborative features.
843
-
844
- Install AI dependencies: `uv add "dartlab[ai]"`
845
-
846
- ### Project Settings (`.dartlab.yml`)
847
-
848
- ```yaml
849
- company: 005930 # default company
850
- provider: openai # default LLM provider
851
- model: gpt-4o # default model
852
- verbose: false
853
- ```
854
-
855
- ## MCP โ€” AI Assistant Integration
856
-
857
- DartLab includes a built-in [MCP](https://modelcontextprotocol.io/) server that exposes 60 tools (16 global + 44 per-company) to Claude Desktop, Claude Code, Cursor, and any MCP-compatible client.
858
-
859
- ```bash
860
- uv add "dartlab[mcp]"
861
- ```
862
-
863
- ### Claude Desktop
864
-
865
- Add to `claude_desktop_config.json`:
866
-
867
- ```json
868
- {
869
- "mcpServers": {
870
- "dartlab": {
871
- "command": "uv",
872
- "args": ["run", "dartlab", "mcp"]
873
- }
874
- }
875
- }
876
- ```
877
-
878
- ### Claude Code
879
-
880
- ```bash
881
- claude mcp add dartlab -- uv run dartlab mcp
882
- ```
883
-
884
- Or add to `~/.claude/settings.json`:
885
-
886
- ```json
887
- {
888
- "mcpServers": {
889
- "dartlab": {
890
- "command": "uv",
891
- "args": ["run", "dartlab", "mcp"]
892
- }
893
- }
894
- }
895
- ```
896
-
897
- ### Cursor
898
-
899
- Add to `.cursor/mcp.json` with the same config format as Claude Desktop.
900
-
901
- ### What's Available
902
-
903
- Once connected, your AI assistant can:
904
-
905
- - **Search** โ€” find companies by name or code (`search_company`)
906
- - **Show** โ€” read any disclosure topic (`show_topic`, `list_topics`, `diff_topic`)
907
- - **Finance** โ€” balance sheet, income statement, cash flow, ratios (`get_financial_statements`, `get_ratios`)
908
- - **Analysis** โ€” insights, sector ranking, valuation (`get_insight`, `get_ranking`)
909
- - **EDGAR** โ€” same tools work for US companies (`stock_code: "AAPL"`)
910
-
911
- Auto-generate config for your platform:
912
-
913
- ```bash
914
- dartlab mcp --config claude-desktop
915
- dartlab mcp --config claude-code
916
- dartlab mcp --config cursor
917
- ```
918
-
919
- ## OpenAPI โ€” Raw Public APIs
920
-
921
- Use source-native wrappers when you want raw disclosure APIs directly.
922
-
923
- ### OpenDart (Korea)
924
-
925
- > **Note:** `Company` does **not** require an API key โ€” it uses pre-built datasets.
926
- > `OpenDart` uses the raw DART API and requires a key from [opendart.fss.or.kr](https://opendart.fss.or.kr) (free).
927
- > Recent filing-list AI questions across the whole market also use this key. In the UI, open Settings and manage `OpenDART API key` there.
928
-
929
- ```python
930
- from dartlab import OpenDart
931
-
932
- d = OpenDart()
933
- d.search("์นด์นด์˜ค", listed=True)
934
- d.filings("์‚ผ์„ฑ์ „์ž", "2024")
935
- d.finstate("์‚ผ์„ฑ์ „์ž", 2024)
936
- d.report("์‚ผ์„ฑ์ „์ž", "๋ฐฐ๋‹น", 2024)
937
- ```
938
-
939
- ### OpenEdgar (US)
940
-
941
- > **No API key required.** SEC EDGAR is a public API โ€” no registration needed.
942
-
943
- ```python
944
- from dartlab import OpenEdgar
945
-
946
- e = OpenEdgar()
947
- e.search("Apple")
948
- e.filings("AAPL", forms=["10-K", "10-Q"])
949
- e.companyFactsJson("AAPL")
950
- ```
951
-
952
- ## Data
953
-
954
- **No manual setup required.** When you create a `Company`, dartlab automatically downloads the required data.
955
-
956
- | Dataset | Coverage | Size | Source |
957
- |---------|----------|------|--------|
958
- | DART docs | 2,500+ companies | ~8 GB | [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data/tree/main/dart/docs) |
959
- | DART finance | 2,700+ companies | ~600 MB | [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data/tree/main/dart/finance) |
960
- | DART report | 2,700+ companies | ~320 MB | [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data/tree/main/dart/report) |
961
- | EDGAR | On-demand | โ€” | SEC API (auto-fetched) |
962
-
963
- ### 3-Step Data Pipeline
964
-
965
- ```
966
- dartlab.Company("005930")
967
- โ”‚
968
- โ”œโ”€ 1. Local cache โ”€โ”€โ”€โ”€ already have it? done (instant)
969
- โ”‚
970
- โ”œโ”€ 2. HuggingFace โ”€โ”€โ”€โ”€ auto-download (~seconds, no key needed)
971
- โ”‚
972
- โ””โ”€ 3. DART API โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ collect with your API key (needs key)
973
- ```
974
-
975
- If a company is not in HuggingFace, dartlab collects data directly from DART โ€” this requires an API key:
976
-
977
- ```bash
978
- dartlab setup dart-key
979
- ```
980
-
981
- ### Freshness โ€” Automatic Update Detection
982
-
983
- DartLab uses a 3-layer freshness system to keep your local data current:
984
-
985
- | Layer | Method | Cost |
986
- |-------|--------|------|
987
- | L1 | HTTP HEAD โ†’ ETag comparison with HuggingFace | ~0.5s, few hundred bytes |
988
- | L2 | Local file age (90-day TTL fallback) | instant (local) |
989
- | L3 | DART API โ†’ `rcept_no` diff (requires API key) | 1 API call, ~1s |
990
-
991
- When you open a `Company`, dartlab checks if newer data exists. If a new disclosure was filed:
992
-
993
- ```python
994
- c = dartlab.Company("005930")
995
- # [dartlab] โš  005930 โ€” ์ƒˆ ๊ณต์‹œ 2๊ฑด ๋ฐœ๊ฒฌ (์‚ฌ์—…๋ณด๊ณ ์„œ (2024.12))
996
- # โ€ข ์ฆ๋ถ„ ์ˆ˜์ง‘: dartlab collect --incremental 005930
997
- # โ€ข ๋˜๋Š” Python: c.update()
998
-
999
- c.update() # incremental collect โ€” only missing filings
1000
- ```
1001
-
1002
- ```bash
1003
- # CLI freshness check
1004
- dartlab collect --check 005930 # single company
1005
- dartlab collect --check # scan all local companies (7 days)
1006
-
1007
- # incremental collect โ€” only missing filings
1008
- dartlab collect --incremental 005930 # single company
1009
- dartlab collect --incremental # all local companies with new filings
1010
- ```
1011
-
1012
- ### Batch Collection (DART API)
1013
-
1014
- ```bash
1015
- dartlab collect --batch # all listed, missing only
1016
- dartlab collect --batch -c finance 005930 # specific category + company
1017
- dartlab collect --batch --mode all # re-collect everything
1018
- ```
1019
-
1020
- ## Try It Now
1021
-
1022
- ### Live Demo (No Install)
1023
-
1024
- Try DartLab instantly โ€” no Python, no terminal, no setup:
1025
-
1026
- **[โ†’ Open Live Demo](https://huggingface.co/spaces/eddmpython/dartlab)** โ€” enter a stock code, see financials immediately
1027
-
1028
- Or open a [Colab notebook](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/01_quickstart.ipynb) in your browser.
1029
-
1030
- ### Marimo Notebooks
1031
-
1032
- > Data is automatically downloaded on first use. No setup required unless collecting new companies directly from DART.
1033
-
1034
- ```bash
1035
- uv add dartlab marimo
1036
- marimo edit notebooks/marimo/dartCompany.py # Korean company (DART)
1037
- marimo edit notebooks/marimo/edgarCompany.py # US company (EDGAR)
1038
- marimo edit notebooks/marimo/aiAnalysis.py # AI analysis examples
1039
- ```
1040
-
1041
- ### Colab Notebooks
1042
-
1043
- **Showcase** (English โ€” global audience):
1044
-
1045
- | Notebook | Topic |
1046
- |---|---|
1047
- | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/01_quickstart.ipynb) | **Quick Start** โ€” analyze any company in 3 lines |
1048
- | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/02_financial_analysis.ipynb) | **Financial Analysis** โ€” statements, time series, ratios |
1049
- | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/03_kr_us_compare.ipynb) | **Korea vs US** โ€” Samsung vs Apple side-by-side |
1050
- | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/04_risk_diff.ipynb) | **Risk Diff** โ€” track disclosure changes (Bloomberg can't) |
1051
- | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/05_sector_screening.ipynb) | **Sector Screening** โ€” 8 presets, sector benchmarks |
1052
- | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/06_insight_anomaly.ipynb) | **Insight & Anomaly** โ€” 10-area grading, 6 anomaly rules |
1053
- | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/07_network_governance.ipynb) | **Network & Governance** โ€” corporate relationship graph |
1054
- | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/08_signal_trend.ipynb) | **Signal Trends** โ€” 48-keyword disclosure monitoring |
1055
- | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/09_ai_analysis.ipynb) | **AI Analysis** โ€” `dartlab.ask()` with 9 LLM providers |
1056
- | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/10_disclosure_deep_dive.ipynb) | **Disclosure Deep Dive** โ€” sections architecture |
1057
-
1058
- <details>
1059
- <summary>ํ•œ๊ตญ์–ด Tutorials</summary>
1060
-
1061
- | Notebook | Topic |
1062
- |---|---|
1063
- | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/getting-started/quickstart.ipynb) | **๋น ๋ฅธ ์‹œ์ž‘** โ€” sections, show, trace, diff |
1064
- | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/tutorials/02_financial_statements.ipynb) | **์žฌ๋ฌด์ œํ‘œ** โ€” BS, IS, CF |
1065
- | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/tutorials/04_ratios.ipynb) | **์žฌ๋ฌด๋น„์œจ** โ€” 47๊ฐœ ๋น„์œจ |
1066
- | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/tutorials/06_disclosure.ipynb) | **๊ณต์‹œ ํ…์ŠคํŠธ** โ€” sections ํŒŒ์‹ฑ |
1067
- | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/tutorials/09_edgar.ipynb) | **EDGAR** โ€” ๋ฏธ๊ตญ SEC |
1068
-
1069
- </details>
1070
-
1071
- ## Documentation
1072
-
1073
- - Docs: https://eddmpython.github.io/dartlab/
1074
- - Sections guide: https://eddmpython.github.io/dartlab/docs/getting-started/sections
1075
- - Quick start: https://eddmpython.github.io/dartlab/docs/getting-started/quickstart
1076
- - API overview: https://eddmpython.github.io/dartlab/docs/api/overview
1077
- - Beginner guide (Korean): https://eddmpython.github.io/dartlab/blog/dartlab-easy-start/
1078
-
1079
- ### Blog
1080
-
1081
- The [DartLab Blog](https://eddmpython.github.io/dartlab/blog/) covers practical disclosure analysis โ€” how to read reports, interpret patterns, and spot risk signals. 120+ articles across three categories:
1082
-
1083
- - **Disclosure Systems** โ€” structure and mechanics of DART/EDGAR filings
1084
- - **Report Reading** โ€” practical guide to audit reports, preliminary earnings, restatements
1085
- - **Financial Interpretation** โ€” financial statements, ratios, and disclosure signals
1086
-
1087
- ## Stability
1088
-
1089
- | Tier | Scope |
1090
- |------|-------|
1091
- | **Stable** | DART Company (sections, show, trace, diff, BS/IS/CF, CIS, index, filings, profile), EDGAR Company core, valuation, forecast, simulation |
1092
- | **Beta** | EDGAR power-user (SCE, notes, freq, coverage), insights, distress, ratios, timeseries, network, governance, workforce, capital, debt, chart/table/text tools, ask/chat, OpenDart, OpenEdgar, Server API, MCP, CLI subcommands |
1093
- | **Experimental** | AI tool calling, export |
1094
- | **Alpha** | Desktop App (Windows .exe) โ€” functional but incomplete, Sections Viewer โ€” not yet fully structured |
1095
-
1096
- See [docs/stability.md](docs/stability.md).
1097
-
1098
- ## Contributing
1099
-
1100
- The project prefers **experiments before engine changes**. If you want to propose a parser or mapping change, validate it in `experiments/` first and bring the verified result back into the engine.
1101
-
1102
- - **Experiment folder**: `experiments/XXX_camelCaseName/` โ€” each file must be independently runnable with actual results in its docstring
1103
- - **Data contributions** (e.g. `accountMappings.json`, `sectionMappings.json`): only accepted when backed by experiment evidence โ€” no manual bulk edits
1104
- - Issues and PRs in Korean or English are both welcome
1105
-
1106
- ## License
1107
-
1108
- MIT
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pyproject.toml CHANGED
@@ -59,6 +59,7 @@ dependencies = [
59
  "orjson>=3.10.0,<4",
60
  "polars>=1.0.0,<2",
61
  "requests>=2.32.5,<3",
 
62
  "rich>=14.3.3,<15",
63
  "plotly>=5.0.0,<6",
64
  "mcp[cli]>=1.0",
 
59
  "orjson>=3.10.0,<4",
60
  "polars>=1.0.0,<2",
61
  "requests>=2.32.5,<3",
62
+ "prompt-toolkit>=3.0,<4",
63
  "rich>=14.3.3,<15",
64
  "plotly>=5.0.0,<6",
65
  "mcp[cli]>=1.0",
src/dartlab/ai/DEV.md CHANGED
@@ -1,5 +1,77 @@
1
  # AI Engine Development Guide
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  ## Source Of Truth
4
 
5
  - ๋ฐ์ดํ„ฐ source-of-truth: `src/dartlab/core/registry.py`
 
1
  # AI Engine Development Guide
2
 
3
+ ## ์„ค๊ณ„ ์‚ฌ์ƒ
4
+
5
+ ### dartlab AI๋Š” ๋ฌด์—‡์ธ๊ฐ€
6
+
7
+ dartlab์˜ ํ•ต์‹ฌ ์ž์‚ฐ์€ ๋ฐ์ดํ„ฐ ์—”์ง„์ด๋‹ค. ์ „์ž๊ณต์‹œ ์›๋ณธ์„ ์ •๊ทœํ™”ํ•˜์—ฌ **์ „๊ธฐ๊ฐ„ ๋น„๊ต๊ฐ€๋Šฅ + ๊ธฐ์—…๊ฐ„ ๋น„๊ต๊ฐ€๋Šฅ**ํ•œ ๊ตฌ์กฐ๋กœ ๋งŒ๋“  ๊ฒƒ์ด dartlab์˜ ์กด์žฌ ์ด์œ ๋‹ค. AI๋Š” ์ด ๋ฐ์ดํ„ฐ ์œ„์—์„œ ๋™์ž‘ํ•˜๋Š” **์†Œ๋น„์ž**์ด์ง€, ๋ฐ์ดํ„ฐ๋ฅผ ๋Œ€์ฒดํ•˜์ง€ ์•Š๋Š”๋‹ค.
8
+
9
+ **LLM์€ ํ•ด์„์ž์ด์ง€ ๋ถ„์„๊ฐ€๊ฐ€ ์•„๋‹ˆ๋‹ค.**
10
+ - ๊ณ„์‚ฐ์€ ์—”์ง„์ด ํ•œ๋‹ค (ratios, timeseries, insights, valuation)
11
+ - ํŒ๋‹จ์€ ์—”์ง„์ด ํ•œ๋‹ค (anomaly detection, scoring, red flags)
12
+ - LLM์€ ์—”์ง„ ๊ฒฐ๊ณผ๋ฅผ ๋ฐ›์•„์„œ **"์™œ"๋ฅผ ์„ค๋ช…ํ•˜๊ณ , ์ธ๊ณผ ๊ด€๊ณ„๋ฅผ ์„œ์ˆ ํ•˜๊ณ , ์‚ฌ์šฉ์ž ์งˆ๋ฌธ์— ๋‹ตํ•œ๋‹ค**
13
+
14
+ ์ด๊ฒƒ์ด dexter์™€์˜ ๊ทผ๋ณธ์  ์ฐจ์ด๋‹ค:
15
+ - dexter: ๋ฐ์ดํ„ฐ ์—†์Œ. LLM์ด ์™ธ๋ถ€ API๋ฅผ ํ˜ธ์ถœํ•ด์„œ ๋ฐ์ดํ„ฐ๋ฅผ ์ˆ˜์ง‘ํ•˜๊ณ  ๋ถ„์„. LLM์ด ์ „๋ถ€.
16
+ - dartlab: ๋ฐ์ดํ„ฐ ์—”์ง„์ด ์ „๋ถ€. LLM์€ ์ •๊ทœํ™”๋œ ๋ฐ์ดํ„ฐ๋ฅผ ์ฝ๊ณ  ํ•ด์„ํ•˜๋Š” ๋งˆ์ง€๋ง‰ ๊ณ„์ธต.
17
+
18
+ ### 2-Tier ์•„ํ‚คํ…์ฒ˜
19
+
20
+ - **Tier 1 (์‹œ์Šคํ…œ ์ฃผ๋„)**: ์งˆ๋ฌธ ๋ถ„๋ฅ˜ โ†’ ์—”์ง„ ๊ณ„์‚ฐ โ†’ ๊ฒฐ๊ณผ๋ฅผ ์ปจํ…์ŠคํŠธ๋กœ ์กฐ๋ฆฝ โ†’ LLM์— ํ•œ ๋ฒˆ ์ „๋‹ฌ. ๋ชจ๋“  provider์—์„œ ๋™์ž‘. tool calling ๋ถˆํ•„์š”.
21
+ - **Tier 2 (LLM ์ฃผ๋„)**: Tier 1 ๊ฒฐ๊ณผ๋ฅผ ๋ณด๊ณ  LLM์ด "๋ถ€์กฑํ•˜๋‹ค" ํŒ๋‹จ โ†’ ๋„๊ตฌ ํ˜ธ์ถœ๋กœ ์ถ”๊ฐ€ ํƒ์ƒ‰. tool calling ๊ฐ€๋Šฅํ•œ provider์—์„œ๋งŒ ๋™์ž‘.
22
+
23
+ Tier 1์ด ์ถฉ๋ถ„ํ•˜๋ฉด LLM roundtrip์€ 1ํšŒ๋‹ค. ์ด๊ฒƒ์ด ์†๋„์˜ ํ•ต์‹ฌ์ด๋‹ค.
24
+
25
+ ### ์†๋„ ์›์น™
26
+
27
+ **LLM roundtrip์„ ์ค„์ด๋Š” ๊ฒƒ์ด ์†๋„๋‹ค.**
28
+ - ๋” ๋งŽ์€ ๋ฐ์ดํ„ฐ๋ฅผ ๋ฏธ๋ฆฌ ์กฐ๋ฆฝํ•ด์„œ 1ํšŒ์— ๋๋‚ด๋Š” ๊ฒƒ์ด ๋น ๋ฅด๋‹ค (Tier 1 ๊ฐ•ํ™”)
29
+ - ๋„๊ตฌ ํ˜ธ์ถœ์„ ๋ณ‘๋ ฌํ™”ํ•˜๋Š” ๊ฒƒ๋ณด๋‹ค, ์• ์ดˆ์— ํ˜ธ์ถœ์ด ํ•„์š” ์—†๊ฒŒ ๋งŒ๋“œ๋Š” ๊ฒƒ์ด ๋น ๋ฅด๋‹ค
30
+ - changes(๊ณต์‹œ ๋ณ€ํ™”๋ถ„ 23%)๋ฅผ ์ปจํ…์ŠคํŠธ์— ๋ฏธ๋ฆฌ ๋„ฃ์œผ๋ฉด "๋ญ๊ฐ€ ๋ฐ”๋€Œ์—ˆ์ง€?" ํƒ์ƒ‰ ํ˜ธ์ถœ์ด ์‚ฌ๋ผ์ง„๋‹ค
31
+
32
+ ### dexter์—์„œ ํก์ˆ˜ํ•œ ๊ฒƒ
33
+
34
+ | ํŒจํ„ด | dexter ์›๋ณธ | dartlab ์ ์šฉ |
35
+ |------|------------|-------------|
36
+ | Scratchpad | ๋„๊ตฌ ๊ฒฐ๊ณผ ๋ˆ„์ /ํ† ํฐ ๊ด€๋ฆฌ | `runtime/scratchpad.py` โ€” ์ค‘๋ณต ํ˜ธ์ถœ ๋ฐฉ์ง€, ํ† ํฐ ์˜ˆ์‚ฐ |
37
+ | SOUL.md | ๋ถ„์„ ์ฒ ํ•™ ์ฃผ์ž… | `templates/analysisPhilosophy.py` โ€” Palepu-Healy + CFA ์‚ฌ๊ณ  ํ”„๋ ˆ์ž„ |
38
+ | stripFieldsDeep | ๋„๊ตฌ ๊ฒฐ๊ณผ ํ•„๋“œ ์ œ๊ฑฐ | `context/pruning.py` โ€” XBRL ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์žฌ๊ท€ ์ œ๊ฑฐ |
39
+ | SKILL.md | ์›Œํฌํ”Œ๋กœ์šฐ ๊ฐ€์ด๋“œ | `skills/catalog.py` โ€” 8๊ฐœ ๋ถ„์„ ์Šคํ‚ฌ (๋„๊ตฌ ๋น„์˜์กด) |
40
+ | ์ž์œจ ์—์ด์ „ํŠธ | ์ถฉ๋ถ„ํ•  ๋•Œ๊นŒ์ง€ ํƒ์ƒ‰ | `agentLoopAutonomous()` โ€” report_mode Tier 2 |
41
+ | ์„ธ์…˜ ๋ฉ”๋ชจ๋ฆฌ | SQLite + ์‹œ๊ฐ„ ๊ฐ์‡  | `memory/store.py` โ€” ๋ถ„์„ ๊ธฐ๋ก ์˜์† |
42
+
43
+ ### ํก์ˆ˜ํ•˜์ง€ ์•Š์€ ๊ฒƒ
44
+
45
+ - **๋ฐ์ดํ„ฐ ์†Œ์œ  ๊ตฌ์กฐ**: dexter๋Š” ์™ธ๋ถ€ API๋กœ ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘. dartlab์€ ์ด๋ฏธ ๋ฐ์ดํ„ฐ ์—”์ง„์„ ์†Œ์œ .
46
+ - **๋‹จ์ผ ๋ชจ๋ธ ์˜์กด**: dexter๋Š” ๋ชจ๋“  ํŒ๋‹จ์„ LLM์— ์œ„์ž„. dartlab์€ ์—”์ง„์ด ๊ณ„์‚ฐ/ํŒ๋‹จํ•˜๊ณ  LLM์€ ํ•ด์„๋งŒ.
47
+ - **meta-tool ํŒจํ„ด**: ๋„๊ตฌ ์•ˆ์— ๋„๊ตฌ๋ฅผ ๋„ฃ๋Š” ๊ตฌ์กฐ. dartlab์€ Super Tool 7๊ฐœ๋กœ ์ด๋ฏธ ํ•ด๊ฒฐ.
48
+
49
+ ### ์‚ฌ์šฉ์ž ์›์น™
50
+
51
+ - **์ ‘๊ทผ์„ฑ**: ์ข…๋ชฉ์ฝ”๋“œ ํ•˜๋‚˜๋ฉด ๋. `dartlab ask "005930" "์˜์—…์ด์ต๋ฅ  ์ถ”์„ธ๋Š”?"` ๋˜๋Š” `dartlab chat`์œผ๋กœ ์ธํ„ฐ๋ž™ํ‹ฐ๋ธŒ.
52
+ - **์‹ ๋ขฐ์„ฑ**: ์ˆซ์ž๋Š” ์—”์ง„์ด ๊ณ„์‚ฐํ•œ ์›๋ณธ. LLM์ด ์ˆซ์ž๋ฅผ ๋งŒ๋“ค์–ด๋‚ด๋ฉด ๊ฒ€์ฆ ๋ ˆ์ด์–ด๊ฐ€ ์žก๋Š”๋‹ค.
53
+ - **ํˆฌ๋ช…์„ฑ**: ์–ด๋–ค ๋ฐ์ดํ„ฐ๋ฅผ ๋ดค๋Š”์ง€(includedEvidence), ์–ด๋–ค ๋„๊ตฌ๋ฅผ ์ผ๋Š”์ง€(tool_call) ํ•ญ์ƒ ๋…ธ์ถœ.
54
+
55
+ ### ํ’ˆ์งˆ ๊ฒ€์ฆ ๊ธฐ์ค€์„  (2026-03-27)
56
+
57
+ ollama qwen3:4b ๊ธฐ์ค€ critical+high 35๊ฑด ๋ฐฐ์น˜ ๊ฒฐ๊ณผ:
58
+
59
+ | ์ง€ํ‘œ | ๊ฐ’ | ๋น„๊ณ  |
60
+ |------|-----|------|
61
+ | avgOverall | 7.33 | gemini fallback ์ˆ˜์ • ํ›„ ์žฌ์ธก์ • (์ˆ˜์ • ์ „ 5.98) |
62
+ | routeMatch | 1.00 | intent ๋ถ„๋ฅ˜ + ๋ผ์šฐํŒ… ์™„๋ฒฝ |
63
+ | moduleUtilization | 0.75 | ์ผ๋ถ€ eval ์ผ€์ด์Šค ์ •ํ•ฉ์„ฑ ๋ฌธ์ œ ํฌํ•จ |
64
+ | falseUnavailable | 0/35 | "๋ฐ์ดํ„ฐ ์—†๋‹ค" ๊ฑฐ์ง“ ์‘๋‹ต ์—†์Œ |
65
+
66
+ production ๋ชจ๋ธ(openai/gemini) ์ธก์ •์€ API ํ‚ค ํ™•๋ณด ํ›„ ์ง„ํ–‰ ์˜ˆ์ •. factual accuracy๋Š” production ๋ชจ๋ธ์—์„œ๋งŒ ์œ ์˜๋ฏธ.
67
+
68
+ ์ฃผ์š” failure taxonomy:
69
+ - **runtime_error**: provider ์„ค์ • ์ •ํ•ฉ์„ฑ (ํ•ด๊ฒฐ๋จ)
70
+ - **retrieval_failure**: eval ์ผ€์ด์Šค expectedModules์™€ ์‹ค์ œ ์ปจํ…์ŠคํŠธ ๋นŒ๋” ๋งคํ•‘ ๊ฐ„๊ทน
71
+ - **generation_failure**: ์†Œํ˜• ๋ชจ๋ธ ํ•œ๊ณ„ (production ๋ชจ๋ธ์—์„œ ์žฌ์ธก์ • ํ•„์š”)
72
+
73
+ ---
74
+
75
  ## Source Of Truth
76
 
77
  - ๋ฐ์ดํ„ฐ source-of-truth: `src/dartlab/core/registry.py`
src/dartlab/ai/context/builder.py CHANGED
@@ -231,8 +231,7 @@ _CANDIDATE_ALIASES = {
231
  }
232
  _MARGIN_DRIVER_MARGIN_HINTS = frozenset({"์˜์—…์ด์ต๋ฅ ", "๋งˆ์ง„", "์ด์ต๋ฅ ", "margin"})
233
  _MARGIN_DRIVER_COST_HINTS = frozenset({"๋น„์šฉ ๊ตฌ์กฐ", "์›๊ฐ€ ๊ตฌ์กฐ", "๋น„์šฉ", "์›๊ฐ€", "ํŒ๊ด€๋น„", "๋งค์ถœ์›๊ฐ€"})
234
- _MARGIN_DRIVER_BUSINESS_HINTS = frozenset({"์‚ฌ์—… ๋ณ€ํ™”", "์‚ฌ์—…๋ณ€ํ™”", "์‚ฌ์—… ๊ตฌ์กฐ", "์‚ฌ์—…๊ตฌ์กฐ"})
235
- _RECENT_DISCLOSURE_BUSINESS_HINTS = frozenset({"์‚ฌ์—… ๋ณ€ํ™”", "์‚ฌ์—…๋ณ€ํ™”", "์‚ฌ์—… ๊ตฌ์กฐ", "์‚ฌ์—…๊ตฌ์กฐ"})
236
  _PERIOD_COLUMN_RE = re.compile(r"^\d{4}(?:Q[1-4])?$")
237
 
238
 
@@ -372,13 +371,13 @@ def _has_margin_driver_pattern(question: str) -> bool:
372
  return (
373
  _question_has_any(question, _MARGIN_DRIVER_MARGIN_HINTS)
374
  and _question_has_any(question, _MARGIN_DRIVER_COST_HINTS)
375
- and _question_has_any(question, _MARGIN_DRIVER_BUSINESS_HINTS)
376
  )
377
 
378
 
379
  def _has_recent_disclosure_business_pattern(question: str) -> bool:
380
  lowered = question.lower()
381
- return "์ตœ๊ทผ ๊ณต์‹œ" in lowered and _question_has_any(question, _RECENT_DISCLOSURE_BUSINESS_HINTS)
382
 
383
 
384
  def _resolve_direct_hint_modules(question: str) -> list[str]:
@@ -957,6 +956,61 @@ def _build_sections_context(
957
  return result
958
 
959
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
960
  def _select_section_slices(context_slices: Any, topic: str) -> pl.DataFrame | None:
961
  if not isinstance(context_slices, pl.DataFrame) or context_slices.is_empty():
962
  return None
@@ -1203,6 +1257,14 @@ def _build_compact_context_modules_inner(
1203
  if included_name not in included:
1204
  included.append(included_name)
1205
 
 
 
 
 
 
 
 
 
1206
  direct_sections = _build_direct_module_context(
1207
  company,
1208
  candidate_plan.get("direct", []),
 
231
  }
232
  _MARGIN_DRIVER_MARGIN_HINTS = frozenset({"์˜์—…์ด์ต๋ฅ ", "๋งˆ์ง„", "์ด์ต๋ฅ ", "margin"})
233
  _MARGIN_DRIVER_COST_HINTS = frozenset({"๋น„์šฉ ๊ตฌ์กฐ", "์›๊ฐ€ ๊ตฌ์กฐ", "๋น„์šฉ", "์›๊ฐ€", "ํŒ๊ด€๋น„", "๋งค์ถœ์›๊ฐ€"})
234
+ _BUSINESS_CHANGE_HINTS = frozenset({"์‚ฌ์—… ๋ณ€ํ™”", "์‚ฌ์—…๋ณ€ํ™”", "์‚ฌ์—… ๊ตฌ์กฐ", "์‚ฌ์—…๊ตฌ์กฐ"})
 
235
  _PERIOD_COLUMN_RE = re.compile(r"^\d{4}(?:Q[1-4])?$")
236
 
237
 
 
371
  return (
372
  _question_has_any(question, _MARGIN_DRIVER_MARGIN_HINTS)
373
  and _question_has_any(question, _MARGIN_DRIVER_COST_HINTS)
374
+ and _question_has_any(question, _BUSINESS_CHANGE_HINTS)
375
  )
376
 
377
 
378
  def _has_recent_disclosure_business_pattern(question: str) -> bool:
379
  lowered = question.lower()
380
+ return "์ตœ๊ทผ ๊ณต์‹œ" in lowered and _question_has_any(question, _BUSINESS_CHANGE_HINTS)
381
 
382
 
383
  def _resolve_direct_hint_modules(question: str) -> list[str]:
 
956
  return result
957
 
958
 
959
+ def _build_changes_context(company: Any, *, compact: bool = True) -> str:
960
+ """sections ๋ณ€ํ™” ์š”์•ฝ์„ LLM ์ปจํ…์ŠคํŠธ์šฉ ๋งˆํฌ๋‹ค์šด์œผ๋กœ ๋ณ€ํ™˜.
961
+
962
+ ์ „์ฒด sections(97MB) ๋Œ€์‹  ๋ณ€ํ™”๋ถ„(23%)๋งŒ ์š”์•ฝํ•˜์—ฌ ์ œ๊ณต.
963
+ LLM์ด ์ถ”๊ฐ€ ๋„๊ตฌ ํ˜ธ์ถœ ์—†์ด "๋ฌด์—‡์ด ๋ฐ”๋€Œ์—ˆ๋Š”์ง€" ์ฆ‰์‹œ ํŒŒ์•… ๊ฐ€๋Šฅ.
964
+ """
965
+ docs = getattr(company, "docs", None)
966
+ sections = getattr(docs, "sections", None)
967
+ if sections is None or not hasattr(sections, "changeSummary"):
968
+ return ""
969
+
970
+ try:
971
+ summary = sections.changeSummary(topN=8 if compact else 15)
972
+ except (AttributeError, TypeError, ValueError, pl.exceptions.PolarsError):
973
+ return ""
974
+
975
+ if summary is None or summary.is_empty():
976
+ return ""
977
+
978
+ lines = ["\n## ๊ณต์‹œ ๋ณ€ํ™” ์š”์•ฝ"]
979
+ lines.append("| topic | ๋ณ€ํ™”์œ ํ˜• | ๊ฑด์ˆ˜ | ํ‰๊ท ํฌ๊ธฐ๋ณ€ํ™” |")
980
+ lines.append("|-------|---------|------|------------|")
981
+ for row in summary.iter_rows(named=True):
982
+ topic = row.get("topic", "")
983
+ changeType = row.get("changeType", "")
984
+ count = row.get("count", 0)
985
+ avgDelta = row.get("avgDelta", 0)
986
+ sign = "+" if avgDelta and avgDelta > 0 else ""
987
+ lines.append(f"| {topic} | {changeType} | {count} | {sign}{avgDelta} |")
988
+
989
+ # ์ตœ๊ทผ ๊ธฐ๊ฐ„ ์ฃผ์š” ๋ณ€ํ™” ๋ฏธ๋ฆฌ๋ณด๊ธฐ
990
+ try:
991
+ changes = sections.changes()
992
+ except (AttributeError, TypeError, ValueError, pl.exceptions.PolarsError):
993
+ changes = None
994
+
995
+ if changes is not None and not changes.is_empty():
996
+ # ๊ฐ€์žฅ ์ตœ๊ทผ ๊ธฐ๊ฐ„ ์ „ํ™˜์—์„œ structural/appeared ๋ณ€ํ™”๋งŒ ๋ฐœ์ทŒ
997
+ latestPeriod = changes.get_column("toPeriod").max()
998
+ recent = changes.filter(
999
+ (pl.col("toPeriod") == latestPeriod) & pl.col("changeType").is_in(["structural", "appeared"])
1000
+ )
1001
+ if not recent.is_empty():
1002
+ lines.append(f"\n### ์ตœ๊ทผ ์ฃผ์š” ๋ณ€ํ™” ({latestPeriod})")
1003
+ for row in recent.head(5 if compact else 10).iter_rows(named=True):
1004
+ topic = row.get("topic", "")
1005
+ ct = row.get("changeType", "")
1006
+ preview = row.get("preview", "")
1007
+ if preview:
1008
+ preview = preview[:120] + "..." if len(preview) > 120 else preview
1009
+ lines.append(f"- **{topic}** [{ct}]: {preview}")
1010
+
1011
+ return "\n".join(lines)
1012
+
1013
+
1014
  def _select_section_slices(context_slices: Any, topic: str) -> pl.DataFrame | None:
1015
  if not isinstance(context_slices, pl.DataFrame) or context_slices.is_empty():
1016
  return None
 
1257
  if included_name not in included:
1258
  included.append(included_name)
1259
 
1260
+ # ๋ณ€ํ™” ์ปจํ…์ŠคํŠธ โ€” sections ๋ณ€ํ™”๋ถ„๋งŒ LLM์— ์ „๋‹ฌ (roundtrip ๊ฐ์†Œ)
1261
+ if route in {"sections", "hybrid"}:
1262
+ changes_context = _build_changes_context(company, compact=compact)
1263
+ if changes_context:
1264
+ modules_dict["_changes"] = changes_context
1265
+ if "_changes" not in included:
1266
+ included.append("_changes")
1267
+
1268
  direct_sections = _build_direct_module_context(
1269
  company,
1270
  candidate_plan.get("direct", []),
src/dartlab/ai/context/pruning.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """๋„๊ตฌ ๊ฒฐ๊ณผ ํ•„๋“œ pruning โ€” LLM์— ๋ถˆํ•„์š”ํ•œ ์ปฌ๋Ÿผ/ํ•„๋“œ ์žฌ๊ท€ ์ œ๊ฑฐ.
2
+
3
+ dexter์˜ stripFieldsDeep ํŒจํ„ด์„ Python์— ์ ์šฉ.
4
+ ํ† ํฐ ์ ˆ์•ฝ + ๋ถ„์„ ๊ด€๋ จ์„ฑ ํ–ฅ์ƒ.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from typing import Any
11
+
12
+ # LLM ๋ถ„์„์— ๋ถˆํ•„์š”ํ•œ ํ•„๋“œ โ€” ์žฌ๊ท€์ ์œผ๋กœ ์ œ๊ฑฐ
13
+ _STRIP_FIELDS: frozenset[str] = frozenset(
14
+ {
15
+ # XBRL ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ
16
+ "concept_id",
17
+ "xbrl_context_id",
18
+ "instant",
19
+ "member",
20
+ "dimension",
21
+ "label_ko_raw",
22
+ # ๊ณต์‹œ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ
23
+ "acceptance_number",
24
+ "rcept_no",
25
+ "filing_date",
26
+ "report_code",
27
+ "reprt_code",
28
+ "corp_cls",
29
+ "corp_code",
30
+ # ๊ธฐ์ˆ ์  ์‹๋ณ„์ž
31
+ "sj_div",
32
+ "ord",
33
+ "data_rank",
34
+ "source_file",
35
+ "source_path",
36
+ "sourceBlockOrder",
37
+ # ์ค‘๋ณต/๋‚ด๋ถ€์šฉ
38
+ "account_id_raw",
39
+ "account_nm_raw",
40
+ "currency",
41
+ }
42
+ )
43
+
44
+ # ๋ชจ๋“ˆ๋ณ„ ์ถ”๊ฐ€ ์ œ๊ฑฐ ํ•„๋“œ
45
+ _MODULE_STRIP: dict[str, frozenset[str]] = {
46
+ "finance": frozenset({"bsns_year", "sj_nm", "stock_code", "fs_div", "fs_nm"}),
47
+ "explore": frozenset({"blockHash", "rawHtml", "charCount"}),
48
+ "report": frozenset({"rcept_no", "corp_code", "corp_cls"}),
49
+ }
50
+
51
+
52
+ def pruneToolResult(toolName: str, result: str, *, maxChars: int = 8000) -> str:
53
+ """๋„๊ตฌ ๊ฒฐ๊ณผ ๋ฌธ์ž์—ด์—์„œ ๋ถˆํ•„์š” ํ•„๋“œ๋ฅผ ์ œ๊ฑฐ."""
54
+ if not result or len(result) < 100:
55
+ return result
56
+
57
+ # JSON ํŒŒ์‹ฑ ์‹œ๋„
58
+ try:
59
+ data = json.loads(result)
60
+ except (json.JSONDecodeError, ValueError):
61
+ # JSON์ด ์•„๋‹ˆ๋ฉด ๊ทธ๋Œ€๋กœ ๋ฐ˜ํ™˜ (๋งˆํฌ๋‹ค์šด ํ…Œ์ด๋ธ” ๋“ฑ)
62
+ return result[:maxChars] if len(result) > maxChars else result
63
+
64
+ # ๋ชจ๋“ˆ๋ณ„ ์ถ”๊ฐ€ ํ•„๋“œ ๊ฒฐ์ •
65
+ category = _resolveCategory(toolName)
66
+ extra = _MODULE_STRIP.get(category, frozenset())
67
+ stripFields = _STRIP_FIELDS | extra
68
+
69
+ pruned = _pruneValue(data, stripFields, depth=0)
70
+ text = json.dumps(pruned, ensure_ascii=False, indent=2, default=str)
71
+ if len(text) > maxChars:
72
+ return text[:maxChars] + "\n... (pruned+truncated)"
73
+ return text
74
+
75
+
76
+ def _pruneValue(value: Any, stripFields: frozenset[str], depth: int) -> Any:
77
+ """์žฌ๊ท€์  ํ•„๋“œ ์ œ๊ฑฐ."""
78
+ if depth > 8:
79
+ return value
80
+ if isinstance(value, dict):
81
+ return {k: _pruneValue(v, stripFields, depth + 1) for k, v in value.items() if k not in stripFields}
82
+ if isinstance(value, list):
83
+ return [_pruneValue(item, stripFields, depth + 1) for item in value]
84
+ return value
85
+
86
+
87
+ def _resolveCategory(toolName: str) -> str:
88
+ """๋„๊ตฌ ์ด๋ฆ„์—์„œ ์นดํ…Œ๊ณ ๋ฆฌ ์ถ”์ถœ."""
89
+ if toolName in ("finance", "get_data", "compute_ratios"):
90
+ return "finance"
91
+ if toolName in ("explore", "show", "search_data"):
92
+ return "explore"
93
+ if toolName in ("report", "get_report"):
94
+ return "report"
95
+ return ""
src/dartlab/ai/conversation/prompts.py CHANGED
@@ -36,6 +36,14 @@ from .templates.analysis_rules import (
36
  from .templates.analysis_rules import (
37
  TOPIC_PROMPTS as _TOPIC_PROMPTS,
38
  )
 
 
 
 
 
 
 
 
39
  from .templates.benchmarks import _INDUSTRY_BENCHMARKS, _SECTOR_MAP
40
  from .templates.self_critique import (
41
  SELF_CRITIQUE_PROMPT,
@@ -43,8 +51,6 @@ from .templates.self_critique import (
43
  from .templates.self_critique import (
44
  SIGNAL_KEYWORDS as _SIGNAL_KEYWORDS,
45
  )
46
-
47
- # โ”€โ”€ ํ…œํ”Œ๋ฆฟ ๋ฐ์ดํ„ฐ ์ž„ํฌํŠธ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
48
  from .templates.system_base import (
49
  EDGAR_SUPPLEMENT_EN,
50
  EDGAR_SUPPLEMENT_KR,
@@ -63,6 +69,19 @@ _PLUGIN_SYSTEM_PROMPT = """
63
  - ๋ถ„์„ ์ค‘ ํ”Œ๋Ÿฌ๊ทธ์ธ ์ถ”์ฒœ ํžŒํŠธ๊ฐ€ ์ œ๊ณต๋˜๋ฉด, ๋‹ต๋ณ€ ๋์— ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์•ˆ๋‚ดํ•˜์„ธ์š”.
64
  """
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
67
  # ์งˆ๋ฌธ ๋ถ„๋ฅ˜
68
  # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
@@ -235,7 +254,7 @@ def build_system_prompt_parts(
235
 
236
  if compact:
237
  base = _strip_tool_guidance(SYSTEM_PROMPT_COMPACT) if not allow_tools else SYSTEM_PROMPT_COMPACT
238
- static_parts: list[str] = []
239
  dynamic_parts: list[str] = []
240
 
241
  benchmark_key = _match_sector(sector) if sector else None
@@ -259,7 +278,11 @@ def build_system_prompt_parts(
259
  if qt in _FEW_SHOT_COMPACT:
260
  static_parts.append(_FEW_SHOT_COMPACT[qt])
261
 
262
- # ๋™์ : report_mode + ํ”Œ๋Ÿฌ๊ทธ์ธ
 
 
 
 
263
  if report_mode:
264
  dynamic_parts.append(_REPORT_PROMPT_COMPACT)
265
 
@@ -284,10 +307,10 @@ def build_system_prompt_parts(
284
  base = SYSTEM_PROMPT_EN
285
  if not allow_tools:
286
  base = _strip_tool_guidance(base)
287
- static_parts = []
288
  dynamic_parts = []
289
 
290
- # ์ •์ : ๋ฒค์น˜๋งˆํฌ + ํ† ํ”ฝ + ๊ต์ฐจ๊ฒ€์ฆ + Few-shot
291
  benchmark_key = _match_sector(sector) if sector else None
292
  if benchmark_key and benchmark_key in _INDUSTRY_BENCHMARKS:
293
  static_parts.append(_INDUSTRY_BENCHMARKS[benchmark_key])
@@ -314,7 +337,11 @@ def build_system_prompt_parts(
314
  edgar_supp = EDGAR_SUPPLEMENT_EN if lang == "en" else EDGAR_SUPPLEMENT_KR
315
  static_parts.append(edgar_supp)
316
 
317
- # ๋™์ : report_mode + ํ”Œ๋Ÿฌ๊ทธ์ธ
 
 
 
 
318
  if report_mode:
319
  dynamic_parts.append(_REPORT_PROMPT)
320
 
 
36
  from .templates.analysis_rules import (
37
  TOPIC_PROMPTS as _TOPIC_PROMPTS,
38
  )
39
+
40
+ # โ”€โ”€ ํ…œํ”Œ๋ฆฟ ๋ฐ์ดํ„ฐ ์ž„ํฌํŠธ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
41
+ from .templates.analysisPhilosophy import (
42
+ ANALYSIS_PHILOSOPHY_COMPACT as _PHILOSOPHY_COMPACT,
43
+ )
44
+ from .templates.analysisPhilosophy import (
45
+ ANALYSIS_PHILOSOPHY_KR as _PHILOSOPHY_KR,
46
+ )
47
  from .templates.benchmarks import _INDUSTRY_BENCHMARKS, _SECTOR_MAP
48
  from .templates.self_critique import (
49
  SELF_CRITIQUE_PROMPT,
 
51
  from .templates.self_critique import (
52
  SIGNAL_KEYWORDS as _SIGNAL_KEYWORDS,
53
  )
 
 
54
  from .templates.system_base import (
55
  EDGAR_SUPPLEMENT_EN,
56
  EDGAR_SUPPLEMENT_KR,
 
69
  - ๋ถ„์„ ์ค‘ ํ”Œ๋Ÿฌ๊ทธ์ธ ์ถ”์ฒœ ํžŒํŠธ๊ฐ€ ์ œ๊ณต๋˜๋ฉด, ๋‹ต๋ณ€ ๋์— ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์•ˆ๋‚ดํ•˜์„ธ์š”.
70
  """
71
 
72
+ # โ”€โ”€ ์Šคํ‚ฌ ๋งค์นญ ํ—ฌํผ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
73
+
74
+
75
+ def _matchSkillSafe(questionType: str | None, qTypes: list[str]) -> Any:
76
+ """์Šคํ‚ฌ ๋งค์นญ (import ์‹คํŒจ ์‹œ None)."""
77
+ try:
78
+ from dartlab.ai.skills.registry import matchSkill
79
+
80
+ return matchSkill("", questionType=questionType or (qTypes[0] if qTypes else None))
81
+ except Exception:
82
+ return None
83
+
84
+
85
  # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
86
  # ์งˆ๋ฌธ ๋ถ„๋ฅ˜
87
  # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
 
254
 
255
  if compact:
256
  base = _strip_tool_guidance(SYSTEM_PROMPT_COMPACT) if not allow_tools else SYSTEM_PROMPT_COMPACT
257
+ static_parts: list[str] = [_PHILOSOPHY_COMPACT]
258
  dynamic_parts: list[str] = []
259
 
260
  benchmark_key = _match_sector(sector) if sector else None
 
278
  if qt in _FEW_SHOT_COMPACT:
279
  static_parts.append(_FEW_SHOT_COMPACT[qt])
280
 
281
+ # ๋™์ : skill + report_mode + ํ”Œ๋Ÿฌ๊ทธ์ธ
282
+ _skill = _matchSkillSafe(question_type, q_types)
283
+ if _skill:
284
+ dynamic_parts.append(_skill.toPrompt())
285
+
286
  if report_mode:
287
  dynamic_parts.append(_REPORT_PROMPT_COMPACT)
288
 
 
307
  base = SYSTEM_PROMPT_EN
308
  if not allow_tools:
309
  base = _strip_tool_guidance(base)
310
+ static_parts = [_PHILOSOPHY_KR]
311
  dynamic_parts = []
312
 
313
+ # ์ •์ : ์ฒ ํ•™ + ๋ฒค์น˜๋งˆํฌ + ํ† ํ”ฝ + ๊ต์ฐจ๊ฒ€์ฆ + Few-shot
314
  benchmark_key = _match_sector(sector) if sector else None
315
  if benchmark_key and benchmark_key in _INDUSTRY_BENCHMARKS:
316
  static_parts.append(_INDUSTRY_BENCHMARKS[benchmark_key])
 
337
  edgar_supp = EDGAR_SUPPLEMENT_EN if lang == "en" else EDGAR_SUPPLEMENT_KR
338
  static_parts.append(edgar_supp)
339
 
340
+ # ๋™์ : skill + report_mode + ํ”Œ๋Ÿฌ๊ทธ์ธ
341
+ _skill = _matchSkillSafe(question_type, q_types)
342
+ if _skill:
343
+ dynamic_parts.append(_skill.toPrompt())
344
+
345
  if report_mode:
346
  dynamic_parts.append(_REPORT_PROMPT)
347
 
src/dartlab/ai/conversation/templates/analysisPhilosophy.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """๋ถ„์„ ์ฒ ํ•™ โ€” Palepu-Healy + CFA ํ”„๋ ˆ์ž„์›Œํฌ ๊ธฐ๋ฐ˜ ์‚ฌ๊ณ  ํ”„๋ ˆ์ž„.
2
+
3
+ ๊ธฐ์กด system_base.py์˜ 7๋‹จ๊ณ„ ํ”„๋ ˆ์ž„์›Œํฌ๋Š” "์–ด๋–ป๊ฒŒ ๋ถ„์„ํ•˜๋ผ"(์ ˆ์ฐจ).
4
+ ์ด ์ฒ ํ•™์€ "์–ด๋–ค ๊ด€์ ์œผ๋กœ ๋ณด๋ผ"(์‚ฌ๊ณ  ํ”„๋ ˆ์ž„)๋ฅผ ์ฃผ์ž…ํ•œ๋‹ค.
5
+ dexter์˜ SOUL.md ํŒจํ„ด์„ dartlab์— ์ ์šฉ.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ ANALYSIS_PHILOSOPHY_KR = """\
11
+ ## ๋ถ„์„ ์ฒ ํ•™
12
+
13
+ ### ์›์น™ 1: ์ˆซ์ž ๋’ค์˜ ์ด์•ผ๊ธฐ๋ฅผ ์ฝ์–ด๋ผ
14
+ ์žฌ๋ฌด์ œํ‘œ๋Š” ๊ฒฝ์˜ ์˜์‚ฌ๊ฒฐ์ •์˜ ๊ฒฐ๊ณผ๋ฌผ์ด๋‹ค. ์ˆ˜์น˜ ๋ณ€ํ™”๋ฅผ ๋ณด๋ฉด "์™œ?"๋ฅผ ๋ฐ˜๋“œ์‹œ ์ถ”์ ํ•˜๋ผ.
15
+ ๋งค์ถœ์ด ์˜ฌ๋ž๋‹ค๋ฉด โ†’ ๋ฌผ๋Ÿ‰์ธ๊ฐ€, ๋‹จ๊ฐ€์ธ๊ฐ€, ๋ฏน์Šค ๋ณ€ํ™”์ธ๊ฐ€?
16
+ ์ด์ต๋ฅ ์ด ๋–จ์–ด์กŒ๋‹ค๋ฉด โ†’ ์›๊ฐ€์ธ๊ฐ€, ํŒ๊ด€๋น„์ธ๊ฐ€, ์ผํšŒ์„ฑ์ธ๊ฐ€?
17
+
18
+ ### ์›์น™ 2: ์ด์ต์˜ ์งˆ์„ ์˜์‹ฌํ•˜๋ผ
19
+ ํšŒ๊ณ„ ์ด์ต๊ณผ ํ˜„๊ธˆ ์ด์ต์€ ๋‹ค๋ฅด๋‹ค.
20
+ - ์˜์—…CF๊ฐ€ ์ˆœ์ด์ต์„ ์ง€์†์ ์œผ๋กœ ํ•˜ํšŒํ•˜๋ฉด ๋ฐœ์ƒ์ฃผ์˜ ์ด์ต์„ ์˜์‹ฌํ•˜๋ผ
21
+ - ์šด์ „์ž๋ณธ ๋ณ€ํ™”, ๊ฐ๊ฐ€์ƒ๊ฐ ๋Œ€๋น„ CAPEX, ์ž๋ณธํ™” ์ •์ฑ…์„ ํ™•์ธํ•˜๋ผ
22
+ - Accrual Ratio๊ฐ€ ๋†’์œผ๋ฉด ์ด์ต์˜ ์ง€์†๊ฐ€๋Šฅ์„ฑ์— ๋ฌผ์Œํ‘œ๋ฅผ ๋ถ™์—ฌ๋ผ
23
+
24
+ ### ์›์น™ 3: ๊ตฌ์กฐ๋ฅผ ๋ถ„ํ•ดํ•˜๋ผ
25
+ - ROE๋Š” DuPont์œผ๋กœ ๋ถ„ํ•ด: ์ˆ˜์ต์„ฑ ร— ํšจ์œจ์„ฑ ร— ๋ ˆ๋ฒ„๋ฆฌ์ง€
26
+ - ๋งค์ถœ์€ ๋ถ€๋ฌธ๋ณ„, ์ง€์—ญ๋ณ„, ์ œํ’ˆ๋ณ„๋กœ ๋ถ„ํ•ด
27
+ - ๋น„์šฉ์€ ์„ฑ๊ฒฉ๋ณ„(์›์žฌ๋ฃŒ/์ธ๊ฑด๋น„/๊ฐ๊ฐ€)๋กœ ๋ถ„ํ•ด
28
+ - ํ•ฉ์‚ฐ ์ˆซ์ž๋งŒ ๋ณด๋ฉด ๊ตฌ์กฐ ๋ณ€ํ™”๋ฅผ ๋†“์นœ๋‹ค
29
+
30
+ ### ์›์น™ 4: ๊ต์ฐจ๊ฒ€์ฆํ•˜๋ผ
31
+ - ๊ณต์‹œ ์„œ์ˆ ๊ณผ ์žฌ๋ฌด ์ˆ˜์น˜๊ฐ€ ์ผ์น˜ํ•˜๋Š”์ง€ ํ™•์ธ
32
+ - ๊ฒฝ์˜์ง„ ์ฝ”๋ฉ˜ํŠธ์™€ ์‹ค์ œ ์ž๋ณธ ๋ฐฐ๋ถ„์ด ๋ถ€ํ•ฉํ•˜๋Š”์ง€ ํ™•์ธ
33
+ - ๋ถ€๋ฌธ ํ•ฉ์‚ฐ๊ณผ ์—ฐ๊ฒฐ ์ˆ˜์น˜๊ฐ€ ์ •ํ•ฉํ•˜๋Š”์ง€ ํ™•์ธ
34
+ - ๋ถˆ์ผ์น˜๊ฐ€ ์žˆ์œผ๋ฉด ๋ช…์‹œ์ ์œผ๋กœ ์ง€์ ํ•˜๋ผ
35
+
36
+ ### ์›์น™ 5: ์‹œ๊ฐ„์ถ•์œผ๋กœ ํŒ๋‹จํ•˜๋ผ
37
+ - ๋‹จ์ผ ๋ถ„๊ธฐ ์Šค๋ƒ…์ƒท์ด ์•„๋‹ˆ๋ผ 3~5๋…„ ์ถ”์„ธ๋กœ ํŒ๋‹จ
38
+ - ์ผํšŒ์„ฑ๊ณผ ๋ฐ˜๋ณต์„ฑ์„ ๋ถ„๋ฆฌ
39
+ - ์„ฑ์žฅ์ด ์œ ๊ธฐ์ ์ธ์ง€ ์ธ์ˆ˜์— ์˜ํ•œ ๊ฒƒ์ธ์ง€ ๊ตฌ๋ถ„
40
+ - ๋ฏธ๋ž˜ ์ถ”์ •์€ ๊ณผ๊ฑฐ ์ถ”์„ธ์˜ ์—ฐ์žฅ์ด ์•„๋‹ˆ๋ผ ๊ตฌ์กฐ์  ๋ณ€ํ™”๋ฅผ ๋ฐ˜์˜
41
+
42
+ ### ์›์น™ 6: ๋ฆฌ์Šคํฌ๋ฅผ ๋จผ์ € ์ฐพ์•„๋ผ
43
+ - "์ด ํšŒ์‚ฌ๊ฐ€ ์™œ ์ข‹์€๊ฐ€"๋ณด๋‹ค "๋ฌด์—‡์ด ์ž˜๋ชป๋  ์ˆ˜ ์žˆ๋Š”๊ฐ€"๋ฅผ ๋จผ์ € ํƒ์ƒ‰
44
+ - ๊ฐ์‚ฌ์˜๊ฒฌ ๋ณ€ํ™”, ํŠน์ˆ˜๊ด€๊ณ„์ž ๊ฑฐ๋ž˜, ํšŒ๊ณ„์ •์ฑ… ๋ณ€๊ฒฝ์„ ์ฃผ์‹œ
45
+ - ๋ถ€์ฑ„ ๋งŒ๊ธฐ ๊ตฌ์กฐ์™€ ์ด์ž๋ณด์ƒ๋ฐฐ์œจ์„ ํ•จ๊ป˜ ํ™•์ธ
46
+ - ์ง‘์ค‘ ๋ฆฌ์Šคํฌ(๋งค์ถœ์ฒ˜, ๊ณต๊ธ‰์ฒ˜, ์ง€์—ญ)๋ฅผ ํŒŒ์•…
47
+ """
48
+
49
+ ANALYSIS_PHILOSOPHY_COMPACT = """\
50
+ ## ๋ถ„์„ ์›์น™
51
+ 1. ์ˆซ์ž ๋’ค์˜ "์™œ?"๋ฅผ ์ถ”์  (๋งค์ถœ=๋ฌผ๋Ÿ‰ร—๋‹จ๊ฐ€ร—๋ฏน์Šค, ๋น„์šฉ=์›๊ฐ€+ํŒ๊ด€๋น„)
52
+ 2. ์ด์ต์˜ ์งˆ ์˜์‹ฌ (CF vs NI, Accrual Ratio, ์šด์ „์ž๋ณธ ๋ณ€ํ™”)
53
+ 3. DuPont/๋ถ€๋ฌธ/์„ฑ๊ฒฉ๋ณ„ ๋ถ„ํ•ด โ€” ํ•ฉ์‚ฐ๋งŒ ๋ณด๋ฉด ๊ตฌ์กฐ ๋ณ€ํ™”๋ฅผ ๋†“์นจ
54
+ 4. ๊ณต์‹œ ์„œ์ˆ  โ†” ์žฌ๋ฌด ์ˆ˜์น˜ ๊ต์ฐจ๊ฒ€์ฆ โ€” ๋ถˆ์ผ์น˜ ์‹œ ๋ช…์‹œ์  ์ง€์ 
55
+ 5. 3~5๋…„ ์ถ”์„ธ ํŒ๋‹จ โ€” ์ผํšŒ์„ฑ vs ๋ฐ˜๋ณต์„ฑ ๋ถ„๋ฆฌ
56
+ 6. "๋ฌด์—‡์ด ์ž˜๋ชป๋  ์ˆ˜ ์žˆ๋Š”๊ฐ€?" ๋จผ์ € ํƒ์ƒ‰ โ€” ๋ฆฌ์Šคํฌ ์„ ํ–‰
57
+ """
src/dartlab/ai/eval/batchResults/batch_ollama_20260327_124945.jsonl ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"caseId": "researchGather.structure.recentDisclosures", "persona": "research_gather", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
2
+ {"caseId": "accountant.costByNature.summary", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
3
+ {"caseId": "accountant.audit.redFlags", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
4
+ {"caseId": "investor.dividend.sustainability", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
5
+ {"caseId": "investor.downside.risks", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
6
+ {"caseId": "investor.distress.sdi", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
7
+ {"caseId": "analyst.margin.drivers", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
8
+ {"caseId": "analyst.segments.lgchem", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
9
+ {"caseId": "analyst.evidence.recentDisclosures", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.0, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
10
+ {"caseId": "accountant.ambiguous.costStructure", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 5.0, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
11
+ {"caseId": "analyst.quarterly.operatingProfit", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": null, "overall": 4.0, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 0.0, "failureTypes": ["generation_failure", "retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
12
+ {"caseId": "analyst.quarterly.revenue", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": null, "overall": 4.0, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 0.0, "failureTypes": ["generation_failure", "retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
13
+ {"caseId": "investor.profitMargin.context", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
14
+ {"caseId": "investor.growth.cashflowTrend", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
15
+ {"caseId": "analyst.growth.futurePlan", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.0, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
16
+ {"caseId": "investor.growth.revenueGrowth", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
17
+ {"caseId": "analyst.valuation.perComparison", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
18
+ {"caseId": "investor.valuation.intrinsicValue", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
19
+ {"caseId": "analyst.valuation.roe", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.166666666666666, "routeMatch": 1.0, "moduleUtilization": 0.6666666666666666, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
20
+ {"caseId": "investor.report.majorHolder", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
21
+ {"caseId": "accountant.report.executivePay", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
22
+ {"caseId": "analyst.context.evidenceCitation", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
23
+ {"caseId": "businessOwner.context.riskFactors", "persona": "business_owner", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
24
+ {"caseId": "investor.context.disclosureChange", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.0, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
25
+ {"caseId": "analyst.notes.rndExpense", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
26
+ {"caseId": "accountant.notes.tangibleAsset", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 6.0, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
27
+ {"caseId": "analyst.notes.segmentDetail", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.0, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
28
+ {"caseId": "accountant.edge.financialCompany", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 6.166666666666666, "routeMatch": 1.0, "moduleUtilization": 0.6666666666666666, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
29
+ {"caseId": "accountant.cost.rndRatio", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 6.166666666666666, "routeMatch": 1.0, "moduleUtilization": 0.6666666666666666, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
30
+ {"caseId": "analyst.cost.opexBreakdown", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
31
+ {"caseId": "analyst.deep.comprehensiveHealth", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
32
+ {"caseId": "investor.deep.investmentThesis", "persona": "investor", "severity": "critical", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
33
+ {"caseId": "investor.followup.deeperDividend", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
34
+ {"caseId": "analyst.followup.whyMarginDrop", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.166666666666666, "routeMatch": 1.0, "moduleUtilization": 0.6666666666666666, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["clarification_failure", "retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
35
+ {"caseId": "accountant.stability.debtAnalysis", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
src/dartlab/ai/eval/batchResults/batch_ollama_20260327_131602.jsonl ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {"caseId": "analyst.quarterly.operatingProfit", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": null, "overall": 5.0, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 0.0, "failureTypes": ["generation_failure"], "answerLength": 0, "timestamp": "20260327_131602"}
2
+ {"caseId": "analyst.quarterly.revenue", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": null, "overall": 8.727272727272727, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 0.8181818181818182, "failureTypes": [], "answerLength": 739, "timestamp": "20260327_131602"}
3
+ {"caseId": "analyst.deep.comprehensiveHealth", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": null, "overall": 10.083333333333332, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": [], "answerLength": 687, "timestamp": "20260327_131602"}
4
+ {"caseId": "investor.deep.investmentThesis", "persona": "investor", "severity": "critical", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure"], "answerLength": 918, "timestamp": "20260327_131602"}
src/dartlab/ai/eval/batchResults/batch_ollama_20260327_132810.jsonl ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"caseId": "analyst.margin.drivers", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 8.083333333333334, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": [], "answerLength": 186, "timestamp": "20260327_132810"}
2
+ {"caseId": "analyst.segments.lgchem", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 9.25, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": [], "answerLength": 407, "timestamp": "20260327_132810"}
3
+ {"caseId": "analyst.evidence.recentDisclosures", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 8.166666666666666, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure"], "answerLength": 310, "timestamp": "20260327_132810"}
4
+ {"caseId": "analyst.growth.futurePlan", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 8.5, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure"], "answerLength": 319, "timestamp": "20260327_132810"}
5
+ {"caseId": "analyst.valuation.perComparison", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_132810"}
6
+ {"caseId": "analyst.valuation.roe", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 10.537878787878789, "routeMatch": 1.0, "moduleUtilization": 0.6666666666666666, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure"], "answerLength": 375, "timestamp": "20260327_132810"}
7
+ {"caseId": "analyst.context.evidenceCitation", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 9.916666666666668, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure"], "answerLength": 804, "timestamp": "20260327_132810"}
8
+ {"caseId": "analyst.notes.rndExpense", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 9.291666666666666, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": [], "answerLength": 61, "timestamp": "20260327_132810"}
9
+ {"caseId": "analyst.notes.segmentDetail", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_132810"}
10
+ {"caseId": "analyst.cost.opexBreakdown", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 9.0, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": [], "answerLength": 235, "timestamp": "20260327_132810"}
11
+ {"caseId": "analyst.followup.whyMarginDrop", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 10.333333333333334, "routeMatch": 1.0, "moduleUtilization": 0.6666666666666666, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["clarification_failure", "retrieval_failure"], "answerLength": 872, "timestamp": "20260327_132810"}
src/dartlab/ai/eval/diagnosisReports/diagnosis_batch_20260327_124945.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Eval ์ง„๋‹จ ๋ฆฌํฌํŠธ โ€” 2026-03-27 12:49
2
+
3
+ ## ์•ฝ์  ์œ ํ˜• (ํ•˜์œ„ ์ ์ˆ˜)
4
+
5
+ | ์œ ํ˜• | ํ‰๊ท  ์ ์ˆ˜ | ์ผ€์ด์Šค ์ˆ˜ | ์ฃผ์š” ์‹คํŒจ |
6
+ |------|---------|---------|---------|
7
+ | unknown | 5.98 | 35 | runtime_error, retrieval_failure, empty_answer |
8
+
9
+
10
+ # ๊ฐœ์„  ๊ณ„ํš (Remediation)
11
+
12
+ | ์šฐ์„ ์ˆœ์œ„ | Failure | ๋Œ€์ƒ ํŒŒ์ผ | ์„ค๋ช… | ์˜ํ–ฅ๋„ |
13
+ |---------|---------|----------|------|-------|
14
+ | P1 | retrieval_failure | `engines/ai/context/finance_context.py` | _QUESTION_MODULES ๋งคํ•‘์— ๋ชจ๋“ˆ ์ถ”๊ฐ€ (๋ฐœ์ƒ 20ํšŒ) | high |
15
+ | P3 | generation_failure | `engines/ai/conversation/templates/analysis_rules.py` | ๋ถ„์„ ๊ทœ์น™์— few-shot ์˜ˆ์‹œ ์ถ”๊ฐ€ (๋ฐœ์ƒ 2ํšŒ) | medium |
16
+ | P4 | clarification_failure | `engines/ai/conversation/system_base.py` | clarification ์ •์ฑ… ์กฐ๊ฑด ์ˆ˜์ • (๋ฐœ์ƒ 1ํšŒ) | low |
17
+ | P5 | empty_answer | `(๋งคํ•‘ ์—†์Œ)` | ์ƒˆ failure ์œ ํ˜• โ€” ๋งคํ•‘ ์ถ”๊ฐ€ ํ•„์š” (๋ฐœ์ƒ 15ํšŒ) | unknown |
18
+ | P5 | runtime_error | `(๋งคํ•‘ ์—†์Œ)` | ์ƒˆ failure ์œ ํ˜• โ€” ๋งคํ•‘ ์ถ”๊ฐ€ ํ•„์š” (๋ฐœ์ƒ 35ํšŒ) | unknown |
19
+
20
+ **์ฆ‰์‹œ ์กฐ์น˜ ํ•„์š”**: 1๊ฑด
21
+ - [retrieval_failure] โ†’ `engines/ai/context/finance_context.py`
src/dartlab/ai/eval/diagnosisReports/diagnosis_batch_20260327_131602.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Eval ์ง„๋‹จ ๋ฆฌํฌํŠธ โ€” 2026-03-27 13:16
2
+
3
+ ## ์•ฝ์  ์œ ํ˜• (ํ•˜์œ„ ์ ์ˆ˜)
4
+
5
+ | ์œ ํ˜• | ํ‰๊ท  ์ ์ˆ˜ | ์ผ€์ด์Šค ์ˆ˜ | ์ฃผ์š” ์‹คํŒจ |
6
+ |------|---------|---------|---------|
7
+ | unknown | 7.33 | 4 | generation_failure, retrieval_failure |
8
+
9
+
10
+ # ๊ฐœ์„  ๊ณ„ํš (Remediation)
11
+
12
+ | ์šฐ์„ ์ˆœ์œ„ | Failure | ๋Œ€์ƒ ํŒŒ์ผ | ์„ค๋ช… | ์˜ํ–ฅ๋„ |
13
+ |---------|---------|----------|------|-------|
14
+ | P3 | retrieval_failure | `engines/ai/context/finance_context.py` | _QUESTION_MODULES ๋งคํ•‘์— ๋ชจ๋“ˆ ์ถ”๊ฐ€ (๋ฐœ์ƒ 1ํšŒ) | high |
15
+ | P4 | generation_failure | `engines/ai/conversation/templates/analysis_rules.py` | ๋ถ„์„ ๊ทœ์น™์— few-shot ์˜ˆ์‹œ ์ถ”๊ฐ€ (๋ฐœ์ƒ 1ํšŒ) | medium |
src/dartlab/ai/memory/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ """์„ธ์…˜ ๊ฐ„ ๋ถ„์„ ๋ฉ”๋ชจ๋ฆฌ โ€” SQLite ๊ธฐ๋ฐ˜.
2
+
3
+ ์ข…๋ชฉ๋ณ„ ๋ถ„์„ ํžˆ์Šคํ† ๋ฆฌ๋ฅผ ์˜์†ํ•˜์—ฌ ์žฌ๋ถ„์„ ์‹œ ์ด์ „ ๋งฅ๋ฝ์„ ํ™œ์šฉํ•œ๋‹ค.
4
+ """
5
+
6
+ from dartlab.ai.memory.store import AnalysisMemory
7
+
8
+ __all__ = ["AnalysisMemory"]
src/dartlab/ai/memory/store.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """๋ถ„์„ ๋ฉ”๋ชจ๋ฆฌ ์ €์žฅ์†Œ โ€” SQLite ๊ธฐ๋ฐ˜ ์„ธ์…˜ ๊ฐ„ ์˜์†.
2
+
3
+ Company ๊ฐ์ฒด(200~500MB)๋Š” ์ €์žฅํ•˜์ง€ ์•Š๋Š”๋‹ค.
4
+ stockCode + ์‹œ์  + ์งˆ๋ฌธ ์š”์•ฝ + ๊ฒฐ๊ณผ ์š”์•ฝ๋งŒ ์ €์žฅํ•˜์—ฌ ๋ฉ”๋ชจ๋ฆฌ ์•ˆ์ „.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import sqlite3
10
+ import time
11
+ from dataclasses import dataclass
12
+ from pathlib import Path
13
+
14
+ _DB_FILENAME = "analysisMemory.db"
15
+ _MAX_DB_SIZE_MB = 50
16
+ _MAX_SUMMARY_CHARS = 500
17
+
18
+ # ์‹ฑ๊ธ€ํ„ด ์ธ์Šคํ„ด์Šค
19
+ _instance: AnalysisMemory | None = None
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class MemoryRecord:
24
+ """์ €์žฅ๋œ ๋ถ„์„ ๊ธฐ๋ก."""
25
+
26
+ stockCode: str
27
+ question: str
28
+ questionType: str
29
+ resultSummary: str
30
+ timestamp: float
31
+ grade: str | None = None
32
+
33
+
34
+ class AnalysisMemory:
35
+ """SQLite ๊ธฐ๋ฐ˜ ๋ถ„์„ ํžˆ์Šคํ† ๋ฆฌ ์ €์žฅ์†Œ."""
36
+
37
+ def __init__(self, dbPath: Path | None = None) -> None:
38
+ if dbPath is None:
39
+ dbPath = Path.home() / ".dartlab" / _DB_FILENAME
40
+ self._dbPath = dbPath
41
+ self._conn: sqlite3.Connection | None = None
42
+
43
+ def _ensureDb(self) -> sqlite3.Connection:
44
+ """lazy init โ€” AI ๋ถ„์„ ์‹œ์—๋งŒ ์—ฐ๊ฒฐ."""
45
+ if self._conn is not None:
46
+ return self._conn
47
+
48
+ self._dbPath.parent.mkdir(parents=True, exist_ok=True)
49
+ conn = sqlite3.connect(str(self._dbPath), timeout=5)
50
+ conn.execute(
51
+ """CREATE TABLE IF NOT EXISTS analysis (
52
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
53
+ stockCode TEXT NOT NULL,
54
+ question TEXT NOT NULL,
55
+ questionType TEXT DEFAULT '',
56
+ resultSummary TEXT DEFAULT '',
57
+ grade TEXT DEFAULT '',
58
+ timestamp REAL NOT NULL
59
+ )"""
60
+ )
61
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_stock ON analysis(stockCode)")
62
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_ts ON analysis(timestamp)")
63
+ conn.commit()
64
+ self._conn = conn
65
+ return conn
66
+
67
+ def saveAnalysis(
68
+ self,
69
+ stockCode: str,
70
+ question: str,
71
+ questionType: str = "",
72
+ resultSummary: str = "",
73
+ grade: str | None = None,
74
+ ) -> None:
75
+ """๋ถ„์„ ๊ฒฐ๊ณผ ์ €์žฅ."""
76
+ conn = self._ensureDb()
77
+ summary = resultSummary[:_MAX_SUMMARY_CHARS] if resultSummary else ""
78
+ conn.execute(
79
+ "INSERT INTO analysis (stockCode, question, questionType, resultSummary, grade, timestamp) "
80
+ "VALUES (?, ?, ?, ?, ?, ?)",
81
+ (stockCode, question[:200], questionType, summary, grade or "", time.time()),
82
+ )
83
+ conn.commit()
84
+ self._enforceSizeLimit(conn)
85
+
86
+ def recallForStock(
87
+ self,
88
+ stockCode: str,
89
+ limit: int = 5,
90
+ decayDays: int = 90,
91
+ ) -> list[MemoryRecord]:
92
+ """์ข…๋ชฉ๋ณ„ ์ตœ๊ทผ ๋ถ„์„ ๊ธฐ๋ก ์กฐํšŒ (์‹œ๊ฐ„ ๊ฐ์‡  ์ ์šฉ)."""
93
+ conn = self._ensureDb()
94
+ cutoff = time.time() - (decayDays * 86400)
95
+ rows = conn.execute(
96
+ "SELECT stockCode, question, questionType, resultSummary, timestamp, grade "
97
+ "FROM analysis WHERE stockCode = ? AND timestamp > ? "
98
+ "ORDER BY timestamp DESC LIMIT ?",
99
+ (stockCode, cutoff, limit),
100
+ ).fetchall()
101
+ return [
102
+ MemoryRecord(
103
+ stockCode=r[0],
104
+ question=r[1],
105
+ questionType=r[2],
106
+ resultSummary=r[3],
107
+ timestamp=r[4],
108
+ grade=r[5] or None,
109
+ )
110
+ for r in rows
111
+ ]
112
+
113
+ def toPromptContext(self, stockCode: str) -> str:
114
+ """์ด์ „ ๋ถ„์„ ๊ธฐ๋ก์„ ํ”„๋กฌํ”„ํŠธ์šฉ ํ…์ŠคํŠธ๋กœ ๋ณ€ํ™˜."""
115
+ records = self.recallForStock(stockCode)
116
+ if not records:
117
+ return ""
118
+ lines = ["## ์ด์ „ ๋ถ„์„ ๊ธฐ๋ก"]
119
+ for r in records:
120
+ import datetime
121
+
122
+ dt = datetime.datetime.fromtimestamp(r.timestamp).strftime("%Y-%m-%d")
123
+ grade_str = f" [๋“ฑ๊ธ‰: {r.grade}]" if r.grade else ""
124
+ lines.append(f"- **{dt}** ({r.questionType}){grade_str}: {r.question}")
125
+ if r.resultSummary:
126
+ lines.append(f" โ†’ {r.resultSummary[:200]}")
127
+ return "\n".join(lines)
128
+
129
+ def _enforceSizeLimit(self, conn: sqlite3.Connection) -> None:
130
+ """DB ํฌ๊ธฐ ์ œํ•œ โ€” ์ดˆ๊ณผ ์‹œ ์˜ค๋ž˜๋œ ๋ ˆ์ฝ”๋“œ ์‚ญ์ œ."""
131
+ try:
132
+ dbSize = self._dbPath.stat().st_size / (1024 * 1024)
133
+ if dbSize > _MAX_DB_SIZE_MB:
134
+ conn.execute(
135
+ "DELETE FROM analysis WHERE id IN (SELECT id FROM analysis ORDER BY timestamp ASC LIMIT 100)"
136
+ )
137
+ conn.execute("VACUUM")
138
+ conn.commit()
139
+ except OSError:
140
+ pass
141
+
142
+ def close(self) -> None:
143
+ """์—ฐ๊ฒฐ ์ข…๋ฃŒ."""
144
+ if self._conn:
145
+ self._conn.close()
146
+ self._conn = None
147
+
148
+
149
+ def getMemory() -> AnalysisMemory:
150
+ """์‹ฑ๊ธ€ํ„ด ๋ฉ”๋ชจ๋ฆฌ ์ธ์Šคํ„ด์Šค."""
151
+ global _instance
152
+ if _instance is None:
153
+ _instance = AnalysisMemory()
154
+ return _instance
src/dartlab/ai/memory/summarizer.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """๋ถ„์„ ๊ฒฐ๊ณผ ์š”์•ฝ๊ธฐ โ€” ๊ทœ์น™ ๊ธฐ๋ฐ˜ (LLM ํ˜ธ์ถœ ์—†์ด).
2
+
3
+ LLM ๋‹ต๋ณ€์—์„œ ์ €์žฅ์šฉ ์š”์•ฝ์„ ์ถ”์ถœํ•œ๋‹ค.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import re
9
+
10
+
11
+ def summarizeResponse(response: str, maxChars: int = 500) -> str:
12
+ """LLM ์‘๋‹ต์—์„œ ํ•ต์‹ฌ ์š”์•ฝ ์ถ”์ถœ."""
13
+ if not response:
14
+ return ""
15
+
16
+ # 1. "์ข…ํ•ฉ" ๋˜๋Š” "๊ฒฐ๋ก " ์„น์…˜ ์ถ”์ถœ ์‹œ๋„
17
+ conclusionMatch = re.search(
18
+ r"(?:##?\s*(?:์ข…ํ•ฉ|๊ฒฐ๋ก |์š”์•ฝ|ํŒ๋‹จ|Bull|๊ฐ•์ ).*?\n)(.*?)(?:\n##|\Z)",
19
+ response,
20
+ re.DOTALL,
21
+ )
22
+ if conclusionMatch:
23
+ text = conclusionMatch.group(1).strip()
24
+ return _cleanText(text, maxChars)
25
+
26
+ # 2. ๋งˆ์ง€๋ง‰ ๋‹จ๋ฝ ์ถ”์ถœ
27
+ paragraphs = [p.strip() for p in response.split("\n\n") if p.strip()]
28
+ if paragraphs:
29
+ lastParagraph = paragraphs[-1]
30
+ # ํ…Œ์ด๋ธ”์ด๋‚˜ ์ฝ”๋“œ ๋ธ”๋ก์ด ์•„๋‹Œ ๋งˆ์ง€๋ง‰ ํ…์ŠคํŠธ ๋‹จ๋ฝ
31
+ for p in reversed(paragraphs):
32
+ if not p.startswith("|") and not p.startswith("```"):
33
+ return _cleanText(p, maxChars)
34
+ return _cleanText(lastParagraph, maxChars)
35
+
36
+ return _cleanText(response, maxChars)
37
+
38
+
39
+ def extractGrade(response: str) -> str | None:
40
+ """์‘๋‹ต์—์„œ ๋“ฑ๊ธ‰ ์ •๋ณด ์ถ”์ถœ."""
41
+ # "์ข…ํ•ฉ ๋“ฑ๊ธ‰: B+" ๊ฐ™์€ ํŒจํ„ด
42
+ gradeMatch = re.search(r"์ข…ํ•ฉ\s*(?:๋“ฑ๊ธ‰|์ ์ˆ˜)\s*[:๏ผš]\s*([A-F][+-]?)", response)
43
+ if gradeMatch:
44
+ return gradeMatch.group(1)
45
+ return None
46
+
47
+
48
+ def _cleanText(text: str, maxChars: int) -> str:
49
+ """๋งˆํฌ๋‹ค์šด ์ •๋ฆฌ + ๊ธธ์ด ์ œํ•œ."""
50
+ # ๋งˆํฌ๋‹ค์šด ํ—ค๋”, ๋ณผ๋“œ, ์ด๋ชจ์ง€ ์ œ๊ฑฐ
51
+ cleaned = re.sub(r"[#*_`]", "", text)
52
+ cleaned = re.sub(r"\s+", " ", cleaned).strip()
53
+ if len(cleaned) > maxChars:
54
+ return cleaned[: maxChars - 3] + "..."
55
+ return cleaned
src/dartlab/ai/providers/oauth_codex.py CHANGED
@@ -29,22 +29,68 @@ log = logging.getLogger(__name__)
29
  CODEX_API_BASE = "https://chatgpt.com/backend-api"
30
  CODEX_RESPONSES_PATH = "/codex/responses"
31
 
32
- AVAILABLE_MODELS = [
33
  "gpt-5.4",
34
- "gpt-5.3",
35
  "gpt-5.3-codex",
36
- "gpt-5.2",
37
  "gpt-5.2-codex",
38
- "gpt-5.1",
39
- "gpt-5.1-codex",
40
- "gpt-5.1-codex-mini",
41
- "o3",
42
- "o4-mini",
43
- "gpt-4.1",
44
- "gpt-4.1-mini",
45
- "gpt-4.1-nano",
46
  ]
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  class ChatGPTOAuthError(Exception):
50
  """ChatGPT OAuth provider ์—๋Ÿฌ โ€” action ํ•„๋“œ๋กœ ์‚ฌ์šฉ์ž ๋Œ€์‘ ์•ˆ๋‚ด."""
 
29
  CODEX_API_BASE = "https://chatgpt.com/backend-api"
30
  CODEX_RESPONSES_PATH = "/codex/responses"
31
 
32
+ _BUNDLED_MODELS = [
33
  "gpt-5.4",
 
34
  "gpt-5.3-codex",
 
35
  "gpt-5.2-codex",
36
+ "gpt-5.1-codex-max",
 
 
 
 
 
 
 
37
  ]
38
 
39
+ _MODELS_CACHE: list[str] | None = None
40
+ _MODELS_CACHE_TS: float = 0.0
41
+ _MODELS_CACHE_TTL = 300.0 # 5๋ถ„
42
+
43
+
44
+ def _fetchRemoteModels(token: str) -> list[str] | None:
45
+ """์›๊ฒฉ /models API์—์„œ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ ๋ชฉ๋ก ์กฐํšŒ (Codex CLI ๋™์ผ ๋ฐฉ์‹)."""
46
+ url = f"{CODEX_API_BASE}/codex/models"
47
+ headers = {
48
+ "Authorization": f"Bearer {token}",
49
+ "originator": "codex_cli_rs",
50
+ }
51
+ accountId = oauthToken.get_account_id()
52
+ if accountId:
53
+ headers["chatgpt-account-id"] = accountId
54
+ try:
55
+ resp = requests.get(url, headers=headers, timeout=10)
56
+ if resp.status_code != 200:
57
+ return None
58
+ data = resp.json()
59
+ models = []
60
+ for item in data if isinstance(data, list) else data.get("models", data.get("data", [])):
61
+ modelId = item.get("id") or item.get("model") if isinstance(item, dict) else str(item)
62
+ if modelId:
63
+ models.append(modelId)
64
+ return models if models else None
65
+ except (requests.RequestException, json.JSONDecodeError, ValueError):
66
+ return None
67
+
68
+
69
+ def availableModels() -> list[str]:
70
+ """์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ ๋ชฉ๋ก โ€” ์›๊ฒฉ ์กฐํšŒ + ์บ์‹œ + ๋ฒˆ๋“ค fallback."""
71
+ import time
72
+
73
+ global _MODELS_CACHE, _MODELS_CACHE_TS
74
+ now = time.time()
75
+ if _MODELS_CACHE and (now - _MODELS_CACHE_TS) < _MODELS_CACHE_TTL:
76
+ return _MODELS_CACHE
77
+
78
+ try:
79
+ token = oauthToken.get_valid_token()
80
+ except (TokenRefreshError, OSError):
81
+ token = None
82
+
83
+ if token:
84
+ remote = _fetchRemoteModels(token)
85
+ if remote:
86
+ _MODELS_CACHE = remote
87
+ _MODELS_CACHE_TS = now
88
+ return remote
89
+
90
+ _MODELS_CACHE = list(_BUNDLED_MODELS)
91
+ _MODELS_CACHE_TS = now
92
+ return _MODELS_CACHE
93
+
94
 
95
  class ChatGPTOAuthError(Exception):
96
  """ChatGPT OAuth provider ์—๋Ÿฌ โ€” action ํ•„๋“œ๋กœ ์‚ฌ์šฉ์ž ๋Œ€์‘ ์•ˆ๋‚ด."""
src/dartlab/ai/runtime/agent.py CHANGED
@@ -10,6 +10,7 @@ import json
10
  from typing import Any, Callable, Generator
11
 
12
  from dartlab.ai.providers.base import BaseProvider
 
13
  from dartlab.ai.tools.registry import (
14
  build_tool_runtime,
15
  )
@@ -51,6 +52,7 @@ def agent_loop(
51
  """
52
  tool_runtime = runtime or build_tool_runtime(company, name="agent-loop")
53
  tools = selectTools(tool_runtime, questionType=question_type, maxTools=max_tools, hasCompany=company is not None)
 
54
 
55
  last_answer = ""
56
 
@@ -66,10 +68,17 @@ def agent_loop(
66
 
67
  # ๋„๊ตฌ ์‹คํ–‰ + ๊ฒฐ๊ณผ ์ถ”๊ฐ€
68
  for tc in response.tool_calls:
 
 
 
 
 
 
69
  if on_tool_call:
70
  on_tool_call(tc.name, tc.arguments)
71
 
72
  result = tool_runtime.execute_tool(tc.name, tc.arguments)
 
73
 
74
  if on_tool_result:
75
  on_tool_result(tc.name, result)
@@ -92,6 +101,21 @@ _REFLECTION_PROMPT = (
92
  )
93
 
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  def _reflect_on_answer(provider: BaseProvider, messages: list[dict], answer: str) -> str:
96
  """๋‹ต๋ณ€ ์ž์ฒด ๊ฒ€์ฆ โ€” 1ํšŒ reflection์œผ๋กœ ํ’ˆ์งˆ ๋ณด์™„."""
97
  reflect_messages = [
@@ -123,6 +147,7 @@ def agent_loop_stream(
123
  """
124
  tool_runtime = runtime or build_tool_runtime(company, name="agent-stream")
125
  tools = selectTools(tool_runtime, questionType=question_type, maxTools=max_tools, hasCompany=company is not None)
 
126
 
127
  # ๋Œ€ํ™”ํ˜• ์งˆ๋ฌธ์€ ์ฒซ ํ„ด ๋„๊ตฌ ๊ฐ•์ œ ์•ˆ ํ•จ
128
  _isConversation = question_type in ("๋Œ€ํ™”", "๋ฉ”ํƒ€")
@@ -157,10 +182,17 @@ def agent_loop_stream(
157
  messages.append(provider.format_assistant_tool_calls(response.answer, response.tool_calls))
158
 
159
  for tc in response.tool_calls:
 
 
 
 
 
 
160
  if on_tool_call:
161
  on_tool_call(tc.name, tc.arguments)
162
 
163
  result = tool_runtime.execute_tool(tc.name, tc.arguments)
 
164
 
165
  if on_tool_result:
166
  on_tool_result(tc.name, result)
@@ -259,7 +291,7 @@ def agent_loop_planning(
259
  steps = plan.get("steps", [])[:max_steps]
260
 
261
  # 2๋‹จ๊ณ„: ๊ณ„ํš ์ˆœ์ฐจ ์‹คํ–‰
262
- results: list[dict[str, str]] = []
263
  for step in steps:
264
  tool_name = step.get("tool", "")
265
  args = step.get("args", {})
@@ -268,17 +300,13 @@ def agent_loop_planning(
268
  on_tool_call(tool_name, args)
269
 
270
  result = tool_runtime.execute_tool(tool_name, args)
 
271
 
272
  if on_tool_result:
273
  on_tool_result(tool_name, result)
274
 
275
- results.append({"tool": tool_name, "result": result[:3000]})
276
-
277
  # 3๋‹จ๊ณ„: ์ข…ํ•ฉ ๋‹ต๋ณ€ ์ƒ์„ฑ
278
- synthesis_parts = [f"์งˆ๋ฌธ: {question}", "", "## ์ˆ˜์ง‘๋œ ๋ฐ์ดํ„ฐ:"]
279
- for r in results:
280
- synthesis_parts.append(f"\n### {r['tool']}")
281
- synthesis_parts.append(r["result"])
282
  synthesis_parts.append("\n## ์ง€์‹œ์‚ฌํ•ญ:")
283
  synthesis_parts.append(
284
  "์œ„ ๋ฐ์ดํ„ฐ๋ฅผ ์ข…ํ•ฉํ•˜์—ฌ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๊ตฌ์กฐํ™”๋œ ๋‹ต๋ณ€์„ ์ž‘์„ฑํ•˜์„ธ์š”. "
@@ -291,3 +319,92 @@ def agent_loop_planning(
291
  ]
292
  final_resp = provider.complete(synth_messages)
293
  return final_resp.answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  from typing import Any, Callable, Generator
11
 
12
  from dartlab.ai.providers.base import BaseProvider
13
+ from dartlab.ai.runtime.scratchpad import Scratchpad
14
  from dartlab.ai.tools.registry import (
15
  build_tool_runtime,
16
  )
 
52
  """
53
  tool_runtime = runtime or build_tool_runtime(company, name="agent-loop")
54
  tools = selectTools(tool_runtime, questionType=question_type, maxTools=max_tools, hasCompany=company is not None)
55
+ pad = Scratchpad()
56
 
57
  last_answer = ""
58
 
 
68
 
69
  # ๋„๊ตฌ ์‹คํ–‰ + ๊ฒฐ๊ณผ ์ถ”๊ฐ€
70
  for tc in response.tool_calls:
71
+ # ์ค‘๋ณต ํ˜ธ์ถœ ๋ฐฉ์ง€
72
+ warning = pad.getDuplicateWarning(tc.name)
73
+ if warning:
74
+ messages.append(provider.format_tool_result(tc.id, warning))
75
+ continue
76
+
77
  if on_tool_call:
78
  on_tool_call(tc.name, tc.arguments)
79
 
80
  result = tool_runtime.execute_tool(tc.name, tc.arguments)
81
+ pad.addEntry(tc.name, tc.arguments, result)
82
 
83
  if on_tool_result:
84
  on_tool_result(tc.name, result)
 
101
  )
102
 
103
 
104
+ def _buildReflectionPrompt(questionType: str | None = None) -> str:
105
+ """์Šคํ‚ฌ checkpoints๊ฐ€ ์žˆ์œผ๋ฉด reflection ํ”„๋กฌํ”„ํŠธ์— ์ถ”๊ฐ€."""
106
+ base = _REFLECTION_PROMPT
107
+ try:
108
+ from dartlab.ai.skills.registry import matchSkill
109
+
110
+ skill = matchSkill("", questionType=questionType)
111
+ if skill and skill.checkpoints:
112
+ checks = "\n".join(f"- {c}" for c in skill.checkpoints)
113
+ return base + f"\n\n**์ถ”๊ฐ€ ๊ฒ€์ฆ ๊ธฐ์ค€ ({skill.name}):**\n{checks}"
114
+ except Exception:
115
+ pass
116
+ return base
117
+
118
+
119
  def _reflect_on_answer(provider: BaseProvider, messages: list[dict], answer: str) -> str:
120
  """๋‹ต๋ณ€ ์ž์ฒด ๊ฒ€์ฆ โ€” 1ํšŒ reflection์œผ๋กœ ํ’ˆ์งˆ ๋ณด์™„."""
121
  reflect_messages = [
 
147
  """
148
  tool_runtime = runtime or build_tool_runtime(company, name="agent-stream")
149
  tools = selectTools(tool_runtime, questionType=question_type, maxTools=max_tools, hasCompany=company is not None)
150
+ pad = Scratchpad()
151
 
152
  # ๋Œ€ํ™”ํ˜• ์งˆ๋ฌธ์€ ์ฒซ ํ„ด ๋„๊ตฌ ๊ฐ•์ œ ์•ˆ ํ•จ
153
  _isConversation = question_type in ("๋Œ€ํ™”", "๋ฉ”ํƒ€")
 
182
  messages.append(provider.format_assistant_tool_calls(response.answer, response.tool_calls))
183
 
184
  for tc in response.tool_calls:
185
+ # ์ค‘๋ณต ํ˜ธ์ถœ ๋ฐฉ์ง€
186
+ warning = pad.getDuplicateWarning(tc.name)
187
+ if warning:
188
+ messages.append(provider.format_tool_result(tc.id, warning))
189
+ continue
190
+
191
  if on_tool_call:
192
  on_tool_call(tc.name, tc.arguments)
193
 
194
  result = tool_runtime.execute_tool(tc.name, tc.arguments)
195
+ pad.addEntry(tc.name, tc.arguments, result)
196
 
197
  if on_tool_result:
198
  on_tool_result(tc.name, result)
 
291
  steps = plan.get("steps", [])[:max_steps]
292
 
293
  # 2๋‹จ๊ณ„: ๊ณ„ํš ์ˆœ์ฐจ ์‹คํ–‰
294
+ pad = Scratchpad()
295
  for step in steps:
296
  tool_name = step.get("tool", "")
297
  args = step.get("args", {})
 
300
  on_tool_call(tool_name, args)
301
 
302
  result = tool_runtime.execute_tool(tool_name, args)
303
+ pad.addEntry(tool_name, args, result)
304
 
305
  if on_tool_result:
306
  on_tool_result(tool_name, result)
307
 
 
 
308
  # 3๋‹จ๊ณ„: ์ข…ํ•ฉ ๋‹ต๋ณ€ ์ƒ์„ฑ
309
+ synthesis_parts = [f"์งˆ๋ฌธ: {question}", "", "## ์ˆ˜์ง‘๋œ ๋ฐ์ดํ„ฐ:", pad.toContext()]
 
 
 
310
  synthesis_parts.append("\n## ์ง€์‹œ์‚ฌํ•ญ:")
311
  synthesis_parts.append(
312
  "์œ„ ๋ฐ์ดํ„ฐ๋ฅผ ์ข…ํ•ฉํ•˜์—ฌ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๊ตฌ์กฐํ™”๋œ ๋‹ต๋ณ€์„ ์ž‘์„ฑํ•˜์„ธ์š”. "
 
319
  ]
320
  final_resp = provider.complete(synth_messages)
321
  return final_resp.answer
322
+
323
+
324
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
325
+ # ์ž์œจ ํƒ์ƒ‰ ์—์ด์ „ํŠธ (Tier 2 โ€” ์™„์ „ ๋ถ„์„)
326
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
327
+
328
+ _SUFFICIENCY_HINT = (
329
+ "\n\n---\n"
330
+ "**์•ˆ๋‚ด**: ์ถฉ๋ถ„ํ•œ ๋ฐ์ดํ„ฐ๋ฅผ ์ˆ˜์ง‘ํ–ˆ๋‹ค๋ฉด ๋„๊ตฌ๋ฅผ ๋” ํ˜ธ์ถœํ•˜์ง€ ๋ง๊ณ  ์ตœ์ข… ๋‹ต๋ณ€์„ ์ž‘์„ฑํ•˜์„ธ์š”. "
331
+ "์•„์ง ๋ถ€์กฑํ•˜๋ฉด ์ถ”๊ฐ€ ๋„๊ตฌ๋ฅผ ํ˜ธ์ถœํ•˜์„ธ์š”."
332
+ )
333
+
334
+
335
+ def agentLoopAutonomous(
336
+ provider: BaseProvider,
337
+ messages: list[dict],
338
+ company: Any,
339
+ *,
340
+ maxTurns: int = 15,
341
+ maxTools: int | None = None,
342
+ runtime: ToolRuntime | None = None,
343
+ onToolCall: Callable[[str, dict], None] | None = None,
344
+ onToolResult: Callable[[str, str], None] | None = None,
345
+ questionType: str | None = None,
346
+ forceToolFirstTurn: bool = True,
347
+ ) -> Generator[str, None, None]:
348
+ """์ž์œจ ํƒ์ƒ‰ ์—์ด์ „ํŠธ: LLM์ด ์ถฉ๋ถ„ํ•˜๋‹ค๊ณ  ํŒ๋‹จํ•  ๋•Œ๊นŒ์ง€ ๋„๊ตฌ ํ˜ธ์ถœ.
349
+
350
+ Phase 1 Scratchpad + Phase 4 Skill์„ ํ™œ์šฉํ•˜์—ฌ
351
+ report_mode์—์„œ ๊นŠ์ด ์žˆ๋Š” ๋ถ„์„์„ ์ˆ˜ํ–‰ํ•œ๋‹ค.
352
+ """
353
+ tool_runtime = runtime or build_tool_runtime(company, name="agent-autonomous")
354
+ tools = selectTools(tool_runtime, questionType=questionType, maxTools=maxTools, hasCompany=company is not None)
355
+ pad = Scratchpad(tokenBudget=12000)
356
+
357
+ _isConversation = questionType in ("๋Œ€ํ™”", "๋ฉ”ํƒ€")
358
+
359
+ for _turn in range(maxTurns):
360
+ kwargs: dict = {}
361
+ if _turn == 0 and forceToolFirstTurn and not _isConversation and company is not None:
362
+ kwargs["tool_choice"] = "any"
363
+
364
+ try:
365
+ response = provider.complete_with_tools(messages, tools, **kwargs)
366
+ except TypeError:
367
+ response = provider.complete_with_tools(messages, tools)
368
+
369
+ if not response.tool_calls:
370
+ if _turn == 0:
371
+ yield from provider.stream(messages)
372
+ return
373
+ if response.answer and response.answer.strip():
374
+ yield response.answer
375
+ else:
376
+ yield from provider.stream(messages)
377
+ return
378
+
379
+ messages.append(provider.format_assistant_tool_calls(response.answer, response.tool_calls))
380
+
381
+ for tc in response.tool_calls:
382
+ warning = pad.getDuplicateWarning(tc.name)
383
+ if warning:
384
+ messages.append(provider.format_tool_result(tc.id, warning))
385
+ continue
386
+
387
+ if onToolCall:
388
+ onToolCall(tc.name, tc.arguments)
389
+
390
+ result = tool_runtime.execute_tool(tc.name, tc.arguments)
391
+ pad.addEntry(tc.name, tc.arguments, result)
392
+
393
+ if onToolResult:
394
+ onToolResult(tc.name, result)
395
+
396
+ messages.append(provider.format_tool_result(tc.id, result))
397
+
398
+ # 3ํ„ด ์ดํ›„๋ถ€ํ„ฐ ์ถฉ๋ถ„์„ฑ ํžŒํŠธ + ์‚ฌ์šฉ ํ˜„ํ™ฉ์„ user ๋ฉ”์‹œ์ง€๋กœ ์ถ”๊ฐ€
399
+ if _turn >= 2:
400
+ usageSummary = pad.getUsageSummary()
401
+ messages.append({"role": "user", "content": usageSummary + _SUFFICIENCY_HINT})
402
+
403
+ # maxTurns ๋„๋‹ฌ โ€” ์ตœ์ข… ์ข…ํ•ฉ ์š”์ฒญ
404
+ synthPrompt = (
405
+ f"๋„๊ตฌ ํ˜ธ์ถœ์ด ์ตœ๋Œ€ {maxTurns}ํ„ด์— ๋„๋‹ฌํ–ˆ์Šต๋‹ˆ๋‹ค. "
406
+ "์ง€๊ธˆ๊นŒ์ง€ ์ˆ˜์ง‘ํ•œ ๋ฐ์ดํ„ฐ๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ์ตœ์ข… ์ข…ํ•ฉ ๋‹ต๋ณ€์„ ์ž‘์„ฑํ•˜์„ธ์š”.\n\n"
407
+ f"{pad.getUsageSummary()}"
408
+ )
409
+ messages.append({"role": "user", "content": synthPrompt})
410
+ yield from provider.stream(messages)
src/dartlab/ai/runtime/core.py CHANGED
@@ -19,6 +19,7 @@ dartlab.ask(), server UI, CLI๊ฐ€ ๋ชจ๋‘ ์ด ์ฝ”์–ด๋ฅผ ์†Œ๋น„ํ•œ๋‹ค.
19
 
20
  from __future__ import annotations
21
 
 
22
  from typing import Any, Generator
23
 
24
  from dartlab.ai.runtime.events import AnalysisEvent
@@ -30,6 +31,7 @@ from dartlab.ai.runtime.post_processing import (
30
  )
31
  from dartlab.ai.runtime.run_modes import (
32
  _run_agent,
 
33
  _run_light_mode,
34
  _run_stream,
35
  )
@@ -99,6 +101,7 @@ def _build_included_evidence(included_tables: list[str]) -> list[dict[str, str]]
99
  "BS_quarterly": "๋ถ„๊ธฐ๋ณ„ ์žฌ๋ฌด์ƒํƒœํ‘œ",
100
  "_dart_openapi_filings": "์ตœ๊ทผ ๊ณต์‹œ ๋ชฉ๋ก",
101
  "_diff": "๊ณต์‹œ ๋ณ€ํ™” ๋น„๊ต",
 
102
  "_response_contract": "์‘๋‹ต ๊ณ„์•ฝ",
103
  "_clarify": "ํ™•์ธ ์งˆ๋ฌธ",
104
  }
@@ -147,6 +150,7 @@ def _context_label(module_name: str, explicit_label: str | None = None) -> str |
147
  "segments": "์‚ฌ์—…๋ถ€๋ฌธ ๋ฐ์ดํ„ฐ",
148
  "_dart_openapi_filings": "์ตœ๊ทผ ๊ณต์‹œ ๋ชฉ๋ก",
149
  "_diff": "๊ณต์‹œ ๋ณ€ํ™” ๋น„๊ต",
 
150
  }.items()
151
  if normalized == key or module_name == key
152
  ),
@@ -828,6 +832,17 @@ def _analyze_inner(
828
  dataReadyBlock = f"๋ฐ์ดํ„ฐ ๊ฐ€์šฉ์„ฑ\n{dataReadySummary}"
829
  dynamic_part = f"{dynamic_part}\n\n{dataReadyBlock}" if dynamic_part else dataReadyBlock
830
 
 
 
 
 
 
 
 
 
 
 
 
831
  if dialogue_policy:
832
  dynamic_part = dynamic_part + "\n\n" + dialogue_policy if dynamic_part else dialogue_policy
833
 
@@ -885,12 +900,17 @@ def _analyze_inner(
885
  # ๋ชจ๋“  provider์—์„œ Super Tool ๋ชจ๋“œ ๊ธฐ๋ณธ ํ™œ์„ฑํ™” โ€” 8๊ฐœ ๋„๊ตฌ๋กœ ํ†ตํ•ฉ
886
  _useSuperTools = True
887
  effective_turns = max(max_turns, _estimate_max_turns(question, q_type or ""))
888
- for _ev in _run_agent(
 
 
 
 
 
889
  llm,
890
  messages,
891
  company,
892
  question,
893
- max_turns=effective_turns,
894
  max_tools=max_tools,
895
  q_type=q_type,
896
  useSuperTools=_useSuperTools,
@@ -932,6 +952,24 @@ def _analyze_inner(
932
  if response_meta.get("grade") or response_meta.get("has_conclusion"):
933
  _done_payload["responseMeta"] = response_meta
934
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
935
  # โ”€โ”€ 15. Meta ์—…๋ฐ์ดํŠธ (includedModules, yearRange) โ”€โ”€
936
  if _included_tables:
937
  includedEvidence = _build_included_evidence(_included_tables)
 
19
 
20
  from __future__ import annotations
21
 
22
+ import sqlite3
23
  from typing import Any, Generator
24
 
25
  from dartlab.ai.runtime.events import AnalysisEvent
 
31
  )
32
  from dartlab.ai.runtime.run_modes import (
33
  _run_agent,
34
+ _run_agent_autonomous,
35
  _run_light_mode,
36
  _run_stream,
37
  )
 
101
  "BS_quarterly": "๋ถ„๊ธฐ๋ณ„ ์žฌ๋ฌด์ƒํƒœํ‘œ",
102
  "_dart_openapi_filings": "์ตœ๊ทผ ๊ณต์‹œ ๋ชฉ๋ก",
103
  "_diff": "๊ณต์‹œ ๋ณ€ํ™” ๋น„๊ต",
104
+ "_changes": "๊ณต์‹œ ๋ณ€ํ™” ์š”์•ฝ",
105
  "_response_contract": "์‘๋‹ต ๊ณ„์•ฝ",
106
  "_clarify": "ํ™•์ธ ์งˆ๋ฌธ",
107
  }
 
150
  "segments": "์‚ฌ์—…๋ถ€๋ฌธ ๋ฐ์ดํ„ฐ",
151
  "_dart_openapi_filings": "์ตœ๊ทผ ๊ณต์‹œ ๋ชฉ๋ก",
152
  "_diff": "๊ณต์‹œ ๋ณ€ํ™” ๋น„๊ต",
153
+ "_changes": "๊ณต์‹œ ๋ณ€ํ™” ์š”์•ฝ",
154
  }.items()
155
  if normalized == key or module_name == key
156
  ),
 
832
  dataReadyBlock = f"๋ฐ์ดํ„ฐ ๊ฐ€์šฉ์„ฑ\n{dataReadySummary}"
833
  dynamic_part = f"{dynamic_part}\n\n{dataReadyBlock}" if dynamic_part else dataReadyBlock
834
 
835
+ # ์ด์ „ ๋ถ„์„ ๊ธฐ๋ก ์ฃผ์ž… (์„ธ์…˜ ๊ฐ„ ๋ฉ”๋ชจ๋ฆฌ)
836
+ if stock_id:
837
+ try:
838
+ from dartlab.ai.memory.store import getMemory
839
+
840
+ memoryContext = getMemory().toPromptContext(stock_id)
841
+ if memoryContext:
842
+ dynamic_part = f"{dynamic_part}\n\n{memoryContext}" if dynamic_part else memoryContext
843
+ except (ImportError, OSError, sqlite3.Error):
844
+ pass
845
+
846
  if dialogue_policy:
847
  dynamic_part = dynamic_part + "\n\n" + dialogue_policy if dynamic_part else dialogue_policy
848
 
 
900
  # ๋ชจ๋“  provider์—์„œ Super Tool ๋ชจ๋“œ ๊ธฐ๋ณธ ํ™œ์„ฑํ™” โ€” 8๊ฐœ ๋„๊ตฌ๋กœ ํ†ตํ•ฉ
901
  _useSuperTools = True
902
  effective_turns = max(max_turns, _estimate_max_turns(question, q_type or ""))
903
+
904
+ # report_mode โ†’ ์ž์œจ ํƒ์ƒ‰ ์—์ด์ „ํŠธ (Tier 2)
905
+ _agent_fn = _run_agent_autonomous if report_mode else _run_agent
906
+ _effective_max = max(effective_turns, 15) if report_mode else effective_turns
907
+
908
+ for _ev in _agent_fn(
909
  llm,
910
  messages,
911
  company,
912
  question,
913
+ max_turns=_effective_max,
914
  max_tools=max_tools,
915
  q_type=q_type,
916
  useSuperTools=_useSuperTools,
 
952
  if response_meta.get("grade") or response_meta.get("has_conclusion"):
953
  _done_payload["responseMeta"] = response_meta
954
 
955
+ # โ”€โ”€ 14.5. ๋ถ„์„ ๋ฉ”๋ชจ๋ฆฌ ์ €์žฅ โ”€โ”€
956
+ if stock_id and _full_response_parts:
957
+ try:
958
+ from dartlab.ai.memory.store import getMemory
959
+ from dartlab.ai.memory.summarizer import extractGrade, summarizeResponse
960
+
961
+ _fullText = "".join(_full_response_parts)
962
+ _mem = getMemory()
963
+ _mem.saveAnalysis(
964
+ stockCode=stock_id,
965
+ question=question[:200],
966
+ questionType=q_type or "",
967
+ resultSummary=summarizeResponse(_fullText),
968
+ grade=extractGrade(_fullText),
969
+ )
970
+ except (ImportError, OSError, sqlite3.Error):
971
+ pass
972
+
973
  # โ”€โ”€ 15. Meta ์—…๋ฐ์ดํŠธ (includedModules, yearRange) โ”€โ”€
974
  if _included_tables:
975
  includedEvidence = _build_included_evidence(_included_tables)
src/dartlab/ai/runtime/run_modes.py CHANGED
@@ -1,6 +1,6 @@
1
- """AI ๋ถ„์„ ์‹คํ–‰ ๋ชจ๋“œ โ€” light / guided_json / stream / agent.
2
 
3
- core.py์˜ _analyze_inner()์—์„œ ๋””์ŠคํŒจ์น˜ํ•˜๋Š” 4๊ฐ€์ง€ ์‹คํ–‰ ๊ฒฝ๋กœ.
4
  """
5
 
6
  from __future__ import annotations
@@ -249,3 +249,88 @@ def _run_agent(
249
  yield AnalysisEvent("chart", chart_events.pop(0))
250
  while ui_events:
251
  yield ui_events.pop(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """AI ๋ถ„์„ ์‹คํ–‰ ๋ชจ๋“œ โ€” light / guided_json / stream / agent / autonomous.
2
 
3
+ core.py์˜ _analyze_inner()์—์„œ ๋””์ŠคํŒจ์น˜ํ•˜๋Š” 5๊ฐ€์ง€ ์‹คํ–‰ ๊ฒฝ๋กœ.
4
  """
5
 
6
  from __future__ import annotations
 
249
  yield AnalysisEvent("chart", chart_events.pop(0))
250
  while ui_events:
251
  yield ui_events.pop(0)
252
+
253
+
254
+ # โ”€โ”€ Autonomous agent mode (Tier 2) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
255
+
256
+
257
+ def _run_agent_autonomous(
258
+ llm,
259
+ messages: list[dict],
260
+ company: Any,
261
+ question: str,
262
+ *,
263
+ max_turns: int = 15,
264
+ max_tools: int | None = None,
265
+ q_type: str | None = None,
266
+ useSuperTools: bool = True,
267
+ _full_response_parts: list[str],
268
+ ) -> Generator[AnalysisEvent, None, None]:
269
+ """์ž์œจ ํƒ์ƒ‰ ์—์ด์ „ํŠธ โ€” report_mode์—์„œ ๊นŠ์ด ๋ถ„์„."""
270
+ from dartlab.ai.runtime.agent import agentLoopAutonomous, build_agent_system_addition
271
+ from dartlab.ai.tools.registry import build_tool_runtime
272
+
273
+ runtime = build_tool_runtime(company, name="core-autonomous", useSuperTools=useSuperTools)
274
+
275
+ system_addition = build_agent_system_addition(runtime)
276
+ messages[0]["content"] += system_addition
277
+
278
+ tool_calls_log: list[dict] = []
279
+ tool_results_log: list[dict] = []
280
+ chart_events: list[dict] = []
281
+ ui_events: list[AnalysisEvent] = []
282
+
283
+ def _on_tool_call(name: str, arguments: dict) -> None:
284
+ tool_calls_log.append({"name": name, "arguments": arguments})
285
+
286
+ def _on_tool_result(name: str, result: str) -> None:
287
+ tool_results_log.append({"name": name, "result": result})
288
+ if name == "chart":
289
+ try:
290
+ parsed = json.loads(result)
291
+ charts = parsed.get("charts")
292
+ if charts:
293
+ chart_events.append({"charts": charts})
294
+ except (json.JSONDecodeError, TypeError, KeyError):
295
+ pass
296
+ try:
297
+ parsed = json.loads(result)
298
+ if isinstance(parsed, dict) and parsed.get("action"):
299
+ ui_events.append(AnalysisEvent(EventKind.UI_ACTION, parsed))
300
+ except (json.JSONDecodeError, TypeError):
301
+ pass
302
+
303
+ for chunk in agentLoopAutonomous(
304
+ llm,
305
+ messages,
306
+ company,
307
+ maxTurns=max_turns,
308
+ maxTools=max_tools,
309
+ runtime=runtime,
310
+ onToolCall=_on_tool_call,
311
+ onToolResult=_on_tool_result,
312
+ questionType=q_type,
313
+ ):
314
+ while tool_calls_log:
315
+ tc = tool_calls_log.pop(0)
316
+ yield AnalysisEvent("tool_call", tc)
317
+ while tool_results_log:
318
+ tr = tool_results_log.pop(0)
319
+ yield AnalysisEvent("tool_result", tr)
320
+ while chart_events:
321
+ ce = chart_events.pop(0)
322
+ yield AnalysisEvent("chart", ce)
323
+ while ui_events:
324
+ yield ui_events.pop(0)
325
+
326
+ _full_response_parts.append(chunk)
327
+ yield AnalysisEvent("chunk", {"text": chunk})
328
+
329
+ while tool_calls_log:
330
+ yield AnalysisEvent("tool_call", tool_calls_log.pop(0))
331
+ while tool_results_log:
332
+ yield AnalysisEvent("tool_result", tool_results_log.pop(0))
333
+ while chart_events:
334
+ yield AnalysisEvent("chart", chart_events.pop(0))
335
+ while ui_events:
336
+ yield ui_events.pop(0)
src/dartlab/ai/runtime/scratchpad.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """๋„๊ตฌ ๊ฒฐ๊ณผ ๋ˆ„์ /์ •๋ฆฌ ์—”์ง„ โ€” dexter scratchpad ํŒจํ„ด.
2
+
3
+ ์—์ด์ „ํŠธ ๋ฃจํ”„์—์„œ ๋„๊ตฌ ํ˜ธ์ถœ ๊ฒฐ๊ณผ๋ฅผ ๊ตฌ์กฐ์ ์œผ๋กœ ๊ด€๋ฆฌํ•œ๋‹ค:
4
+ - ๋„๊ตฌ๋ณ„ ํ˜ธ์ถœ ํšŸ์ˆ˜ ์ถ”์  + ์ค‘๋ณต ๋ฐฉ์ง€
5
+ - ํ† ํฐ ์˜ˆ์‚ฐ ์ดˆ๊ณผ ์‹œ ์˜ค๋ž˜๋œ ๊ฒฐ๊ณผ ์••์ถ•
6
+ - LLM์— ์ „๋‹ฌํ•  ์ •๋ฆฌ๋œ ์ปจํ…์ŠคํŠธ ์ƒ์„ฑ
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass, field
12
+ from typing import Any
13
+
14
+
15
+ @dataclass
16
+ class _Entry:
17
+ """๋‹จ์ผ ๋„๊ตฌ ํ˜ธ์ถœ ๊ฒฐ๊ณผ."""
18
+
19
+ toolName: str
20
+ args: dict[str, Any]
21
+ result: str
22
+ tokenEstimate: int
23
+ order: int
24
+
25
+
26
+ @dataclass
27
+ class Scratchpad:
28
+ """์—์ด์ „ํŠธ ๋ฃจํ”„ ๋„๊ตฌ ๊ฒฐ๊ณผ ๋ˆ„์ /์ •๋ฆฌ."""
29
+
30
+ entries: list[_Entry] = field(default_factory=list)
31
+ callCounts: dict[str, int] = field(default_factory=dict)
32
+ _order: int = field(default=0, repr=False)
33
+ tokenBudget: int = 8000
34
+
35
+ # โ”€โ”€ ํ•ต์‹ฌ API โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
36
+
37
+ def addEntry(self, toolName: str, args: dict[str, Any], result: str) -> None:
38
+ """๋„๊ตฌ ๊ฒฐ๊ณผ ์ถ”๊ฐ€ (pruning ์ž๋™ ์ ์šฉ)."""
39
+ from dartlab.ai.context.pruning import pruneToolResult
40
+
41
+ pruned = pruneToolResult(toolName, result)
42
+ tokens = _estimateTokens(pruned)
43
+ self._order += 1
44
+ self.entries.append(_Entry(toolName, args, pruned, tokens, self._order))
45
+ self.callCounts[toolName] = self.callCounts.get(toolName, 0) + 1
46
+ self.pruneIfNeeded()
47
+
48
+ def isDuplicateExceeded(self, toolName: str, maxCalls: int = 3) -> bool:
49
+ """๊ฐ™์€ ๋„๊ตฌ๊ฐ€ maxCalls ์ด์ƒ ํ˜ธ์ถœ๋๋Š”์ง€."""
50
+ return self.callCounts.get(toolName, 0) >= maxCalls
51
+
52
+ def pruneIfNeeded(self) -> None:
53
+ """ํ† ํฐ ์˜ˆ์‚ฐ ์ดˆ๊ณผ ์‹œ ์˜ค๋ž˜๋œ ๊ฒฐ๊ณผ๋ฅผ 1์ค„ ์š”์•ฝ์œผ๋กœ ์••์ถ•."""
54
+ while self._totalTokens() > self.tokenBudget and len(self.entries) > 1:
55
+ oldest = self.entries[0]
56
+ summary = _summarizeLine(oldest.toolName, oldest.result)
57
+ oldest.result = summary
58
+ oldest.tokenEstimate = _estimateTokens(summary)
59
+
60
+ # ์š”์•ฝํ•ด๋„ ์—ฌ์ „ํžˆ ์ดˆ๊ณผ๋ฉด ์ œ๊ฑฐ
61
+ if self._totalTokens() > self.tokenBudget:
62
+ self.entries.pop(0)
63
+
64
+ def toContext(self) -> str:
65
+ """๋ˆ„์  ๊ฒฐ๊ณผ๋ฅผ ๋งˆํฌ๋‹ค์šด์œผ๋กœ ๋ณ€ํ™˜."""
66
+ if not self.entries:
67
+ return ""
68
+ parts: list[str] = []
69
+ for e in self.entries:
70
+ argsStr = ", ".join(f"{k}={v}" for k, v in e.args.items()) if e.args else ""
71
+ parts.append(f"### {e.toolName}({argsStr})\n{e.result}")
72
+ return "\n\n".join(parts)
73
+
74
+ def getUsageSummary(self) -> str:
75
+ """ํ˜„์žฌ ๋„๊ตฌ ํ˜ธ์ถœ ํ˜„ํ™ฉ ํ…์ŠคํŠธ."""
76
+ if not self.callCounts:
77
+ return ""
78
+ lines = [f"- {name}: {count}ํšŒ" for name, count in self.callCounts.items()]
79
+ total = self._totalTokens()
80
+ lines.append(f"- ์ปจํ…์ŠคํŠธ: ~{total} ํ† ํฐ / {self.tokenBudget} ์˜ˆ์‚ฐ")
81
+ return "**๋„๊ตฌ ์‚ฌ์šฉ ํ˜„ํ™ฉ:**\n" + "\n".join(lines)
82
+
83
+ def getDuplicateWarning(self, toolName: str) -> str | None:
84
+ """์ค‘๋ณต ์ดˆ๊ณผ ์‹œ LLM์— ์ „๋‹ฌํ•  ๊ฒฝ๊ณ  ๋ฉ”์‹œ์ง€."""
85
+ if not self.isDuplicateExceeded(toolName):
86
+ return None
87
+ count = self.callCounts.get(toolName, 0)
88
+ return (
89
+ f"โš ๏ธ {toolName}์„ ์ด๋ฏธ {count}ํšŒ ํ˜ธ์ถœํ–ˆ์Šต๋‹ˆ๋‹ค. "
90
+ f"๊ฐ™์€ ๋„๊ตฌ๋ฅผ ๋ฐ˜๋ณต ํ˜ธ์ถœํ•˜์ง€ ๋ง๊ณ , ์ˆ˜์ง‘๋œ ๋ฐ์ดํ„ฐ๋กœ ๋‹ต๋ณ€์„ ์ข…ํ•ฉํ•˜์„ธ์š”."
91
+ )
92
+
93
+ # โ”€โ”€ ๋‚ด๋ถ€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
94
+
95
+ def _totalTokens(self) -> int:
96
+ return sum(e.tokenEstimate for e in self.entries)
97
+
98
+
99
+ def _estimateTokens(text: str) -> int:
100
+ """๊ฐ„์ด ํ† ํฐ ์ถ”์ • โ€” ํ•œ๊ธ€ 2์ž=1ํ† ํฐ, ์˜๋ฌธ 4์ž=1ํ† ํฐ ๊ทผ์‚ฌ."""
101
+ if not text:
102
+ return 0
103
+ korean = sum(1 for c in text if "\uac00" <= c <= "\ud7a3")
104
+ other = len(text) - korean
105
+ return korean // 2 + other // 4 + 1
106
+
107
+
108
+ def _summarizeLine(toolName: str, result: str) -> str:
109
+ """๋„๊ตฌ ๊ฒฐ๊ณผ๋ฅผ 1์ค„ ์š”์•ฝ์œผ๋กœ ์••์ถ•."""
110
+ # ์ฒซ ์ค„ ๋˜๋Š” ์ฒซ 100์ž + ์ค„ ์ˆ˜ ์ •๋ณด
111
+ lines = result.strip().split("\n")
112
+ firstLine = lines[0][:100] if lines else ""
113
+ if len(lines) > 1:
114
+ return f"[์š”์•ฝ] {firstLine}... ({len(lines)}์ค„, {toolName})"
115
+ return f"[์š”์•ฝ] {firstLine}"
src/dartlab/ai/skills/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ """๋ถ„์„ ์Šคํ‚ฌ โ€” ํ”„๋กฌํ”„ํŠธ ๊ธฐ๋ฐ˜ ์›Œํฌํ”Œ๋กœ์šฐ ๊ฐ€์ด๋“œ.
2
+
3
+ ๋„๊ตฌ๋ฅผ ์ง€์ •ํ•˜์ง€ ์•Š๊ณ  ๋ถ„์„ ๋ชฉํ‘œ๋งŒ ์„ ์–ธํ•œ๋‹ค.
4
+ LLM์ด ํ˜„์žฌ ๊ฐ€์šฉํ•œ ๋„๊ตฌ ์ค‘์—์„œ ์ž์œจ ์„ ํƒ.
5
+ """
6
+
7
+ from dartlab.ai.skills.registry import Skill, matchSkill
8
+
9
+ __all__ = ["Skill", "matchSkill"]
src/dartlab/ai/skills/catalog.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """๋ถ„์„ ์˜์—ญ๋ณ„ ์Šคํ‚ฌ ์นดํƒˆ๋กœ๊ทธ.
2
+
3
+ ๋„๊ตฌ๋ฅผ ์ง€์ •ํ•˜์ง€ ์•Š๋Š”๋‹ค โ€” ๋ถ„์„ ๋ชฉํ‘œ๋งŒ ์„ ์–ธ.
4
+ 8๋Œ€ ์˜์—ญ์ด ์•ˆ์ •ํ™”๋˜๋ฉด์„œ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ํšจ๊ณผ๊ฐ€ ํ–ฅ์ƒ๋œ๋‹ค.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dartlab.ai.skills.registry import Skill
10
+
11
+ SKILLS: tuple[Skill, ...] = (
12
+ Skill(
13
+ id="profitability",
14
+ name="์ˆ˜์ต์„ฑ ์‹ฌ์ธต ๋ถ„์„",
15
+ triggerKeywords=("์ˆ˜์ต์„ฑ", "์ด์ต๋ฅ ", "๋งˆ์ง„", "ROE", "ROA", "์˜์—…์ด์ต๋ฅ "),
16
+ analysisGoals=(
17
+ "ROE๋ฅผ DuPont ๋ถ„ํ•ดํ•˜์—ฌ ์ˆ˜์ต์„ฑ/ํšจ์œจ์„ฑ/๋ ˆ๋ฒ„๋ฆฌ์ง€ ๋™์ธ ์‹๋ณ„",
18
+ "์˜์—…์ด์ต๋ฅ ๊ณผ ์›๊ฐ€์œจ ์ถ”์„ธ์—์„œ ๋น„์šฉ ๊ตฌ์กฐ ๋ณ€ํ™” ํŒŒ์•…",
19
+ "์˜์—…CF/์ˆœ์ด์ต ๋น„์œจ๋กœ ์ด์ต์˜ ์งˆ ํŒ๋‹จ",
20
+ "๋ถ€๋ฌธ๋ณ„ ์ˆ˜์ต์„ฑ ์ฐจ์ด๊ฐ€ ์žˆ์œผ๋ฉด ์„ธ๊ทธ๋จผํŠธ ๋ถ„ํ•ด",
21
+ ),
22
+ synthesisGuide="DuPont ๋ถ„ํ•ด โ†’ ์ด์ต์˜ ์งˆ โ†’ ๋น„์šฉ ๋™์ธ โ†’ ์ธ๊ณผ ๊ด€๊ณ„ ์„œ์ˆ ",
23
+ checkpoints=(
24
+ "DuPont 3์š”์†Œ ๋ถ„ํ•ด๊ฐ€ ์žˆ๋Š”๊ฐ€?",
25
+ "CF/NI ๋น„์œจ์„ ์ธ์šฉํ–ˆ๋Š”๊ฐ€?",
26
+ "๋น„์šฉ ๊ตฌ์กฐ ๋ณ€ํ™”์˜ ์›์ธ์„ ์„ค๋ช…ํ–ˆ๋Š”๊ฐ€?",
27
+ ),
28
+ ),
29
+ Skill(
30
+ id="health",
31
+ name="์žฌ๋ฌด ๊ฑด์ „์„ฑ ๋ถ„์„",
32
+ triggerKeywords=("๊ฑด์ „์„ฑ", "๋ถ€์ฑ„", "์œ ๋™์„ฑ", "์•ˆ์ •์„ฑ", "์žฌ๋ฌด๊ตฌ์กฐ", "๋ถ€์ฑ„๋น„์œจ"),
33
+ analysisGoals=(
34
+ "๋ถ€์ฑ„๋น„์œจ๊ณผ ์œ ๋™๋น„์œจ ์ถ”์„ธ๋กœ ๊ตฌ์กฐ์  ์•ˆ์ •์„ฑ ํŒ๋‹จ",
35
+ "์ด์ž๋ณด์ƒ๋ฐฐ์œจ๊ณผ ์ฐจ์ž…๊ธˆ ๋งŒ๊ธฐ ๊ตฌ์กฐ ํ™•์ธ",
36
+ "์šด์ „์ž๋ณธ ์‚ฌ์ดํด(๋งค์ถœ์ฑ„๊ถŒ+์žฌ๊ณ -๋งค์ž…์ฑ„๋ฌด) ์ถ”์ด ๋ถ„์„",
37
+ "ํ˜„๊ธˆ์„ฑ ์ž์‚ฐ ๋Œ€๋น„ ๋‹จ๊ธฐ ์˜๋ฌด ์ปค๋ฒ„๋ฆฌ์ง€ ํ™•์ธ",
38
+ ),
39
+ synthesisGuide="๋ ˆ๋ฒ„๋ฆฌ์ง€ ๊ตฌ์กฐ โ†’ ์œ ๋™์„ฑ ๊ณ„์ธต โ†’ ๋ถ€์ฑ„ ๋งŒ๊ธฐ โ†’ ์ข…ํ•ฉ ๊ฑด์ „์„ฑ ํŒ๋‹จ",
40
+ checkpoints=(
41
+ "์œ ๋™๋น„์œจ๊ณผ ๋ถ€์ฑ„๋น„์œจ ์ˆ˜์น˜๋ฅผ ์ธ์šฉํ–ˆ๋Š”๊ฐ€?",
42
+ "์ด์ž๋ณด์ƒ๋ฐฐ์œจ์„ ํ™•์ธํ–ˆ๋Š”๊ฐ€?",
43
+ "๋‹จ๊ธฐ ์œ ๋™์„ฑ ์œ„ํ—˜์„ ํ‰๊ฐ€ํ–ˆ๋Š”๊ฐ€?",
44
+ ),
45
+ ),
46
+ Skill(
47
+ id="valuation",
48
+ name="๋ฐธ๋ฅ˜์—์ด์…˜ ๋ถ„์„",
49
+ triggerKeywords=("๋ฐธ๋ฅ˜์—์ด์…˜", "์ ์ •๊ฐ€์น˜", "๋ชฉํ‘œ๊ฐ€", "์ €ํ‰๊ฐ€", "๊ณ ํ‰๊ฐ€", "PER", "PBR", "DCF"),
50
+ analysisGoals=(
51
+ "ํ•ต์‹ฌ ๋ฉ€ํ‹ฐํ”Œ(PER, PBR, EV/EBITDA) ์‚ฐ์ถœ ๋ฐ ์—…์ข… ๋น„๊ต",
52
+ "์ด์ต ์„ฑ์žฅ๋ฅ ๊ณผ ์ง€์†๊ฐ€๋Šฅ์„ฑ์„ ๊ทผ๊ฑฐ๋กœ ์ ์ • ๋ฉ€ํ‹ฐํ”Œ ๋ฒ”์œ„ ์ถ”์ •",
53
+ "๊ฐ€๋Šฅํ•˜๋ฉด DCF ๊ด€์ ์—์„œ ๋‚ด์žฌ๊ฐ€์น˜ ๋ฒ”์œ„ ์ œ์‹œ",
54
+ "์•ˆ์ „๋งˆ์ง„(ํ˜„์žฌ๊ฐ€ vs ์ ์ •๊ฐ€์น˜ ๋ฒ”์œ„) ํŒ๋‹จ",
55
+ ),
56
+ synthesisGuide="๋ฉ€ํ‹ฐํ”Œ ๋น„๊ต โ†’ ์„ฑ์žฅ๋ฅ  ๊ทผ๊ฑฐ โ†’ ์ ์ •๊ฐ€์น˜ ๋ฒ”์œ„ โ†’ ์•ˆ์ „๋งˆ์ง„ ํŒ๋‹จ",
57
+ checkpoints=(
58
+ "PER/PBR ์ˆ˜์น˜์™€ ์—…์ข… ๋น„๊ต๊ฐ€ ์žˆ๋Š”๊ฐ€?",
59
+ "์„ฑ์žฅ๋ฅ  ๊ทผ๊ฑฐ๋ฅผ ์ œ์‹œํ–ˆ๋Š”๊ฐ€?",
60
+ "์ ์ •๊ฐ€์น˜ ๋ฒ”์œ„๋ฅผ ์ œ์‹œํ–ˆ๋Š”๊ฐ€? (๋‹จ์ผ ๋ชฉํ‘œ๊ฐ€ ์•„๋‹Œ ๋ฒ”์œ„)",
61
+ ),
62
+ ),
63
+ Skill(
64
+ id="risk",
65
+ name="๋ฆฌ์Šคํฌ ๋ถ„์„",
66
+ triggerKeywords=("๋ฆฌ์Šคํฌ", "์œ„ํ—˜", "์œ„๊ธฐ", "๋ถˆํ™•์‹ค์„ฑ", "์ ์ƒ‰์‹ ํ˜ธ"),
67
+ analysisGoals=(
68
+ "์žฌ๋ฌด ๋ฆฌ์Šคํฌ: ์œ ๋™์„ฑ, ๋ ˆ๋ฒ„๋ฆฌ์ง€, ์ด์ž๋ณด์ƒ ์—ญ๋Ÿ‰",
69
+ "์‚ฌ์—… ๋ฆฌ์Šคํฌ: ๋งค์ถœ์ฒ˜ ์ง‘์ค‘, ๊ณต๊ธ‰๋ง ์˜์กด, ๊ทœ์ œ ๋ณ€ํ™”",
70
+ "ํšŒ๊ณ„ ๋ฆฌ์Šคํฌ: ๊ฐ์‚ฌ์˜๊ฒฌ ๋ณ€ํ™”, ํŠน์ˆ˜๊ด€๊ณ„์ž ๊ฑฐ๋ž˜, ํšŒ๊ณ„์ •์ฑ… ๋ณ€๊ฒฝ",
71
+ "๊ณต์‹œ์—์„œ ๊ฒฝ์˜์ง„์ด ์ง์ ‘ ์–ธ๊ธ‰ํ•œ ๋ฆฌ์Šคํฌ ์š”์ธ ํ™•์ธ",
72
+ ),
73
+ synthesisGuide="์žฌ๋ฌด ๋ฆฌ์Šคํฌ โ†’ ์‚ฌ์—… ๋ฆฌ์Šคํฌ โ†’ ํšŒ๊ณ„ ๋ฆฌ์Šคํฌ โ†’ ์ข…ํ•ฉ ์œ„ํ—˜๋„ ํŒ๋‹จ",
74
+ checkpoints=(
75
+ "์ ์ƒ‰ ์‹ ํ˜ธ ์ฒดํฌ๋ฆฌ์ŠคํŠธ๋ฅผ ์ ์šฉํ–ˆ๋Š”๊ฐ€?",
76
+ "๊ณต์‹œ ์›๋ฌธ์—์„œ ๋ฆฌ์Šคํฌ ๊ด€๋ จ ์„œ์ˆ ์„ ์ธ์šฉํ–ˆ๋Š”๊ฐ€?",
77
+ ),
78
+ ),
79
+ Skill(
80
+ id="strategy",
81
+ name="์‚ฌ์—… ์ „๋žต ๋ถ„์„",
82
+ triggerKeywords=("์‚ฌ์—…", "์ „๋žต", "๊ฒฝ์Ÿ์šฐ์œ„", "๋น„์ฆˆ๋‹ˆ์Šค๋ชจ๋ธ", "์‚ฌ์—…๊ตฌ์กฐ", "์‚ฌ์—…๊ฐœ์š”"),
83
+ analysisGoals=(
84
+ "์‚ฌ์—… ๊ตฌ์กฐ: ๋ถ€๋ฌธ๋ณ„ ๋งค์ถœ ๋น„์ค‘๊ณผ ์ˆ˜์ต์„ฑ ์ฐจ์ด",
85
+ "๊ฒฝ์Ÿ ์šฐ์œ„: R&D ํˆฌ์ž ๊ฐ•๋„, ๋งˆ์ง„ ํ”„๋ฆฌ๋ฏธ์—„, ๊ณ ๊ฐ ์ง‘์ค‘๋„",
86
+ "์„ฑ์žฅ ์ „๋žต: ์œ ๊ธฐ์  ์„ฑ์žฅ vs ์ธ์ˆ˜, CAPEX ๋ฐฉํ–ฅ",
87
+ "๊ณต์‹œ ์›๋ฌธ์—์„œ ๊ฒฝ์˜์ง„์˜ ์ „๋žต ์„œ์ˆ  ํ™•์ธ",
88
+ ),
89
+ synthesisGuide="์‚ฌ์—… ๊ตฌ์กฐ ๋ถ„ํ•ด โ†’ ๊ฒฝ์Ÿ ์šฐ์œ„ ์‹๋ณ„ โ†’ ์„ฑ์žฅ ์ „๋žต ํ‰๊ฐ€ โ†’ ์ง€์†๊ฐ€๋Šฅ์„ฑ ํŒ๋‹จ",
90
+ checkpoints=(
91
+ "๋ถ€๋ฌธ๋ณ„ ๋งค์ถœ/์ด์ต ๋น„์ค‘์„ ๋ถ„ํ•ดํ–ˆ๋Š”๊ฐ€?",
92
+ "R&D/CAPEX ํˆฌ์ž ๋ฐฉํ–ฅ์„ ํ™•์ธํ–ˆ๋Š”๊ฐ€?",
93
+ ),
94
+ ),
95
+ Skill(
96
+ id="accounting",
97
+ name="ํšŒ๊ณ„ ํ’ˆ์งˆ ๋ถ„์„",
98
+ triggerKeywords=("ํšŒ๊ณ„", "๊ฐ์‚ฌ", "๋ถ„์‹", "์ด์ต์˜์งˆ", "๋ฐœ์ƒ์ฃผ์˜", "ํšŒ๊ณ„์ •์ฑ…"),
99
+ analysisGoals=(
100
+ "Accrual Ratio ๊ณ„์‚ฐ: (์ˆœ์ด์ต-์˜์—…CF)/ํ‰๊ท ์ž์‚ฐ โ€” 10% ์ดˆ๊ณผ ์‹œ ์˜์‹ฌ",
101
+ "๊ฐ์‚ฌ์˜๊ฒฌ ๋ณ€ํ™”์™€ ๊ฐ์‚ฌ์ธ ๊ต์ฒด ์ด๋ ฅ ํ™•์ธ",
102
+ "ํšŒ๊ณ„์ •์ฑ… ๋ณ€๊ฒฝ(์ˆ˜์ต์ธ์‹, ์ž๋ณธํ™”, ๊ฐ๊ฐ€์ƒ๊ฐ) ์˜ํ–ฅ ํŒŒ์•…",
103
+ "๋งค์ถœ์ฑ„๊ถŒ/์žฌ๊ณ  ์ฆ๊ฐ€์œจ๊ณผ ๋งค์ถœ/์›๊ฐ€ ์ฆ๊ฐ€์œจ ๋น„๊ต",
104
+ ),
105
+ synthesisGuide="Accrual Ratio โ†’ ๊ฐ์‚ฌ ์ด๋ ฅ โ†’ ํšŒ๊ณ„์ •์ฑ… ๋ณ€๊ฒฝ โ†’ ์ด์ต์˜ ์งˆ ์ข…ํ•ฉ",
106
+ checkpoints=(
107
+ "CF/NI ๋น„์œจ ๋˜๋Š” Accrual Ratio๋ฅผ ๊ณ„์‚ฐํ–ˆ๋Š”๊ฐ€?",
108
+ "๊ฐ์‚ฌ์˜๊ฒฌ์„ ํ™•์ธํ–ˆ๋Š”๊ฐ€?",
109
+ ),
110
+ ),
111
+ Skill(
112
+ id="dividend",
113
+ name="๋ฐฐ๋‹น ๋ถ„์„",
114
+ triggerKeywords=("๋ฐฐ๋‹น", "๋ฐฐ๋‹น๊ธˆ", "๋ฐฐ๋‹น๋ฅ ", "๋ฐฐ๋‹น์„ฑํ–ฅ", "์ฃผ์ฃผํ™˜์›"),
115
+ analysisGoals=(
116
+ "๋ฐฐ๋‹น ์ถ”์ด: ๋ฐฐ๋‹น๊ธˆ, ๋ฐฐ๋‹น์ˆ˜์ต๋ฅ , ๋ฐฐ๋‹น์„ฑํ–ฅ 3~5๋…„ ์‹œ๊ณ„์—ด",
117
+ "๋ฐฐ๋‹น ์ง€์†๊ฐ€๋Šฅ์„ฑ: FCF ๋Œ€๋น„ ๋ฐฐ๋‹น๊ธˆ, ์ด์ต ์•ˆ์ •์„ฑ",
118
+ "์ฃผ์ฃผํ™˜์› ์ •์ฑ…: ์ž์‚ฌ์ฃผ ๋งค์ž…, ์†Œ๊ฐ ์ด๋ ฅ ํ™•์ธ",
119
+ "๋™์ข…์—…์ข… ๋ฐฐ๋‹น ์ˆ˜์ค€ ๋น„๊ต (๊ฐ€๋Šฅ ์‹œ)",
120
+ ),
121
+ synthesisGuide="๋ฐฐ๋‹น ์ถ”์ด โ†’ ์ง€์†๊ฐ€๋Šฅ์„ฑ(FCF) โ†’ ์ฃผ์ฃผํ™˜์› ์ •์ฑ… โ†’ ๋งค๋ ฅ๋„ ํŒ๋‹จ",
122
+ checkpoints=(
123
+ "๋ฐฐ๋‹น์„ฑํ–ฅ๊ณผ ๋ฐฐ๋‹น์ˆ˜์ต๋ฅ  ์ˆ˜์น˜๋ฅผ ์ธ์šฉํ–ˆ๋Š”๊ฐ€?",
124
+ "FCF ๋Œ€๋น„ ๋ฐฐ๋‹น ์ปค๋ฒ„๋ฆฌ์ง€๋ฅผ ํ™•์ธํ–ˆ๋Š”๊ฐ€?",
125
+ ),
126
+ ),
127
+ Skill(
128
+ id="comprehensive",
129
+ name="์ข…ํ•ฉ ๋ถ„์„",
130
+ triggerKeywords=("์ข…ํ•ฉ", "์ „๋ฐ˜", "์ „์ฒด", "์ดํ‰", "๋ถ„์„ํ•ด์ค˜", "์–ด๋•Œ"),
131
+ analysisGoals=(
132
+ "์‚ฌ์—… ๊ตฌ์กฐ์™€ ๊ฒฝ์Ÿ ํฌ์ง€์…”๋‹ ํŒŒ์•…",
133
+ "ํ•ต์‹ฌ ์žฌ๋ฌด ์ง€ํ‘œ(์ˆ˜์ต์„ฑ, ๊ฑด์ „์„ฑ, ์„ฑ์žฅ์„ฑ) 3~5๋…„ ์ถ”์„ธ",
134
+ "์ด์ต์˜ ์งˆ๊ณผ ํ˜„๊ธˆํ๋ฆ„ ํ”„๋กœํŒŒ์ผ",
135
+ "์ ์ƒ‰ ์‹ ํ˜ธ ์ฒดํฌ ๋ฐ ๋ฆฌ์Šคํฌ ์š”์ธ ์‹๋ณ„",
136
+ "๊ฐ•์ /์•ฝ์  ์ •๋ฆฌ์™€ Bull/Bear ๋…ผ๊ฑฐ",
137
+ ),
138
+ synthesisGuide="์‚ฌ์—… ๊ตฌ์กฐ โ†’ ์žฌ๋ฌด ์ถ”์„ธ โ†’ ์ด์ต์˜ ์งˆ โ†’ ๋ฆฌ์Šคํฌ โ†’ ๊ฐ•์ /์•ฝ์  โ†’ ์ข…ํ•ฉ ํŒ๋‹จ",
139
+ checkpoints=(
140
+ "์ตœ์†Œ 3๊ฐœ ์ด์ƒ์˜ ์žฌ๋ฌด ๋น„์œจ์„ ์ธ์šฉํ–ˆ๋Š”๊ฐ€?",
141
+ "๊ฐ•์ ๊ณผ ์•ฝ์ ์„ ๊ท ํ˜• ์žˆ๊ฒŒ ์ œ์‹œํ–ˆ๋Š”๊ฐ€?",
142
+ "Bull/Bear ๋…ผ๊ฑฐ๋ฅผ ์ œ์‹œํ–ˆ๋Š”๊ฐ€?",
143
+ ),
144
+ ),
145
+ )
src/dartlab/ai/skills/registry.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """์Šคํ‚ฌ ๋ ˆ์ง€์ŠคํŠธ๋ฆฌ โ€” ๋ถ„์„ ๋ชฉํ‘œ ๊ธฐ๋ฐ˜ ์›Œํฌํ”Œ๋กœ์šฐ ๋งค์นญ.
2
+
3
+ Skill์€ ๋„๊ตฌ๋ฅผ ์ง€์ •ํ•˜์ง€ ์•Š๋Š”๋‹ค.
4
+ ๋ถ„์„ ๋ชฉํ‘œ(analysisGoals)์™€ ์ข…ํ•ฉ ๊ฐ€์ด๋“œ(synthesisGuide)๋งŒ ์„ ์–ธํ•˜๊ณ ,
5
+ LLM์ด ํ˜„์žฌ ๊ฐ€์šฉํ•œ ๋„๊ตฌ ์ค‘์—์„œ ์ž์œจ ์„ ํƒํ•œ๋‹ค.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class Skill:
15
+ """๋ถ„์„ ์›Œํฌํ”Œ๋กœ์šฐ ์ •์˜."""
16
+
17
+ id: str
18
+ name: str
19
+ triggerKeywords: tuple[str, ...]
20
+ analysisGoals: tuple[str, ...]
21
+ synthesisGuide: str
22
+ checkpoints: tuple[str, ...] = field(default_factory=tuple)
23
+
24
+ def toPrompt(self) -> str:
25
+ """์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ์— ์ฃผ์ž…ํ•  ์ž์—ฐ์–ด ๊ฐ€์ด๋“œ."""
26
+ goals = "\n".join(f" {i + 1}. {g}" for i, g in enumerate(self.analysisGoals))
27
+ checks = ""
28
+ if self.checkpoints:
29
+ checks = "\n**์ž์ฒด ๊ฒ€์ฆ:**\n" + "\n".join(f" - {c}" for c in self.checkpoints)
30
+ return f"## ๋ถ„์„ ์Šคํ‚ฌ: {self.name}\n\n**๋ถ„์„ ๋ชฉํ‘œ:**\n{goals}\n\n**์ข…ํ•ฉ ํ”„๋ ˆ์ž„:** {self.synthesisGuide}{checks}"
31
+
32
+
33
+ def matchSkill(
34
+ question: str,
35
+ questionType: str | None = None,
36
+ ) -> Skill | None:
37
+ """์งˆ๋ฌธ์— ๊ฐ€์žฅ ์ ํ•ฉํ•œ ์Šคํ‚ฌ ๋งค์นญ."""
38
+ from dartlab.ai.skills.catalog import SKILLS
39
+
40
+ # 1์ฐจ: questionType์œผ๋กœ ์ง์ ‘ ๋งค์นญ
41
+ if questionType:
42
+ for skill in SKILLS:
43
+ if questionType in skill.triggerKeywords:
44
+ return skill
45
+
46
+ # 2์ฐจ: ์งˆ๋ฌธ ํ…์ŠคํŠธ ํ‚ค์›Œ๋“œ ๋งค์นญ
47
+ if not question:
48
+ return None
49
+
50
+ bestSkill: Skill | None = None
51
+ bestScore = 0
52
+ for skill in SKILLS:
53
+ score = sum(1 for kw in skill.triggerKeywords if kw in question)
54
+ if score > bestScore:
55
+ bestScore = score
56
+ bestSkill = skill
57
+
58
+ return bestSkill if bestScore > 0 else None
src/dartlab/ai/tools/defaults/helpers.py CHANGED
@@ -21,8 +21,11 @@ def df_to_md(df: pl.DataFrame, max_rows: int = 15, max_chars: int = 0, market: s
21
 
22
 
23
  def json_to_text(value: Any, max_chars: int = 4000) -> str:
24
- """dict/list/json ์ง๋ ฌํ™”."""
25
- text = json.dumps(value, ensure_ascii=False, indent=2, default=str)
 
 
 
26
  if len(text) <= max_chars:
27
  return text
28
  return text[:max_chars] + "\n... (truncated)"
 
21
 
22
 
23
  def json_to_text(value: Any, max_chars: int = 4000) -> str:
24
+ """dict/list/json ์ง๋ ฌํ™” (pruning ํ›„)."""
25
+ from dartlab.ai.context.pruning import _STRIP_FIELDS, _pruneValue
26
+
27
+ pruned = _pruneValue(value, _STRIP_FIELDS, depth=0)
28
+ text = json.dumps(pruned, ensure_ascii=False, indent=2, default=str)
29
  if len(text) <= max_chars:
30
  return text
31
  return text[:max_chars] + "\n... (truncated)"
src/dartlab/cli/commands/chat.py ADDED
@@ -0,0 +1,472 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """`dartlab chat` command -- ์ธํ„ฐ๋ž™ํ‹ฐ๋ธŒ ํ„ฐ๋ฏธ๋„ REPL."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from dartlab.cli.context import PROVIDERS
11
+ from dartlab.cli.services.errors import CLIError
12
+ from dartlab.cli.services.providers import detect_provider
13
+ from dartlab.cli.services.runtime import configure_dartlab
14
+
15
+
16
+ def configure_parser(subparsers) -> None:
17
+ parser = subparsers.add_parser("chat", help="๋Œ€ํ™”ํ˜• AI ๋ถ„์„ (์ธํ„ฐ๋ž™ํ‹ฐ๋ธŒ REPL)")
18
+ parser.add_argument("company", nargs="?", default=None, help="์ข…๋ชฉ์ฝ”๋“œ ๋˜๋Š” ํšŒ์‚ฌ๋ช… (์ƒ๋žต ๊ฐ€๋Šฅ)")
19
+ parser.add_argument("--provider", "-p", default=None, choices=PROVIDERS, help="LLM provider")
20
+ parser.add_argument("--model", "-m", default=None, help="๋ชจ๋ธ๋ช…")
21
+ parser.add_argument("--base-url", default=None, help="์ปค์Šคํ…€ API URL")
22
+ parser.add_argument("--api-key", default=None, help="API ํ‚ค")
23
+ parser.add_argument("--continue", dest="cont", action="store_true", help="์ด์ „ ๋Œ€ํ™” ์ด์–ด๊ฐ€๊ธฐ")
24
+ parser.set_defaults(handler=run)
25
+
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # State
29
+ # ---------------------------------------------------------------------------
30
+
31
+ @dataclass
32
+ class _ChatState:
33
+ """REPL ์„ธ์…˜ ์ƒํƒœ."""
34
+
35
+ company: Any | None = None
36
+ stockCode: str | None = None
37
+ provider: str | None = None
38
+ model: str | None = None
39
+ baseUrl: str | None = None
40
+ apiKey: str | None = None
41
+ sessionId: int | None = None
42
+ history: list[dict[str, str]] = field(default_factory=list)
43
+
44
+
45
+ # ---------------------------------------------------------------------------
46
+ # Entry
47
+ # ---------------------------------------------------------------------------
48
+
49
+ def run(args) -> int:
50
+ from rich.console import Console
51
+
52
+ configure_dartlab()
53
+ console = Console()
54
+ provider = args.provider or detect_provider()
55
+
56
+ state = _ChatState(
57
+ provider=provider,
58
+ model=args.model,
59
+ baseUrl=args.base_url,
60
+ apiKey=args.api_key,
61
+ )
62
+
63
+ if args.company:
64
+ if not _loadCompany(state, args.company, console):
65
+ raise CLIError(f"์ข…๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {args.company}")
66
+
67
+ if args.cont and state.stockCode:
68
+ _resumeSession(state, console)
69
+
70
+ _printWelcome(state, console)
71
+ _replLoop(state, console)
72
+ return 0
73
+
74
+
75
+ # ---------------------------------------------------------------------------
76
+ # REPL loop
77
+ # ---------------------------------------------------------------------------
78
+
79
+ _SLASH_WORDS = ["/help", "/company", "/model", "/clear", "/suggest", "/status", "/quit", "/exit", "/q"]
80
+
81
+
82
+ def _replLoop(state: _ChatState, console) -> None:
83
+ promptFn = _makePromptFn()
84
+
85
+ while True:
86
+ prompt = _buildPrompt(state)
87
+ try:
88
+ userInput = promptFn(prompt)
89
+ except KeyboardInterrupt:
90
+ continue
91
+ except EOFError:
92
+ console.print("\n[dim]์ฑ„ํŒ…์„ ์ข…๋ฃŒํ•ฉ๋‹ˆ๋‹ค.[/]")
93
+ break
94
+
95
+ userInput = userInput.strip()
96
+ if not userInput:
97
+ continue
98
+
99
+ if userInput.startswith("/"):
100
+ shouldExit = _handleSlash(userInput, state, console)
101
+ if shouldExit:
102
+ break
103
+ continue
104
+
105
+ # ์ข…๋ชฉ ์—†์œผ๋ฉด ํ…์ŠคํŠธ์—์„œ ์ž๋™ ๊ฐ์ง€ ์‹œ๋„
106
+ if state.company is None:
107
+ _tryAutoDetect(userInput, state, console)
108
+
109
+ _executeQuery(userInput, state, console)
110
+
111
+
112
+ def _makePromptFn():
113
+ """prompt_toolkit PromptSession์„ ๋ฐ˜ํ™˜. ํ„ฐ๋ฏธ๋„์ด ์•„๋‹ˆ๋ฉด input() fallback."""
114
+ try:
115
+ import sys
116
+
117
+ if not sys.stdin.isatty():
118
+ return input
119
+
120
+ from prompt_toolkit import PromptSession
121
+ from prompt_toolkit.completion import WordCompleter
122
+ from prompt_toolkit.history import FileHistory
123
+
124
+ historyDir = Path.home() / ".dartlab"
125
+ historyDir.mkdir(parents=True, exist_ok=True)
126
+ historyFile = historyDir / "chat.history"
127
+
128
+ completer = WordCompleter(_SLASH_WORDS, sentence=True)
129
+ session = PromptSession(
130
+ history=FileHistory(str(historyFile)),
131
+ completer=completer,
132
+ )
133
+ return session.prompt
134
+ except (ImportError, RuntimeError, OSError):
135
+ return input
136
+
137
+
138
+ def _buildPrompt(state: _ChatState) -> str:
139
+ if state.company:
140
+ return f"\ndartlab {state.company.corpName} > "
141
+ return "\ndartlab > "
142
+
143
+
144
+ # ---------------------------------------------------------------------------
145
+ # Query execution
146
+ # ---------------------------------------------------------------------------
147
+
148
+ def _executeQuery(question: str, state: _ChatState, console) -> None:
149
+ from rich.live import Live
150
+ from rich.markdown import Markdown
151
+ from rich.text import Text
152
+
153
+ from dartlab.ai.runtime.core import analyze
154
+
155
+ events = analyze(
156
+ state.company,
157
+ question,
158
+ provider=state.provider,
159
+ model=state.model,
160
+ base_url=state.baseUrl,
161
+ api_key=state.apiKey,
162
+ use_tools=True,
163
+ history=state.history if state.history else None,
164
+ )
165
+
166
+ buffer = ""
167
+ toolStartTime: float | None = None
168
+ toolPanels: list[str] = [] # tool ๊ฒฐ๊ณผ ๋ฐ์ดํ„ฐ ๋ˆ„์  (LLM ์‘๋‹ต ์ „ ํ‘œ์‹œ)
169
+ queryStart = time.monotonic()
170
+
171
+ try:
172
+ with Live(console=console, refresh_per_second=8, vertical_overflow="visible") as live:
173
+ for ev in events:
174
+ if ev.kind == "chunk":
175
+ buffer += ev.data["text"]
176
+ live.update(Markdown(buffer))
177
+ elif ev.kind == "tool_call":
178
+ toolName = ev.data.get("name", "")
179
+ label = _toolLabel(toolName)
180
+ toolStartTime = time.monotonic()
181
+ live.update(Markdown(buffer + f"\n\n> {label} ์กฐํšŒ ์ค‘..."))
182
+ elif ev.kind == "tool_result":
183
+ toolName = ev.data.get("name", "")
184
+ label = _toolLabel(toolName)
185
+ elapsed = ""
186
+ if toolStartTime is not None:
187
+ dt = time.monotonic() - toolStartTime
188
+ elapsed = f" ({dt:.1f}s)"
189
+ toolStartTime = None
190
+ # tool ๊ฒฐ๊ณผ ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘
191
+ resultText = ev.data.get("result", "")
192
+ preview = _toolResultPreview(resultText)
193
+ statusLine = f"> {label} ์™„๋ฃŒ{elapsed}"
194
+ if preview:
195
+ statusLine += f" -- {preview}"
196
+ toolPanels.append(resultText)
197
+ live.update(Markdown(buffer + f"\n\n{statusLine}"))
198
+ elif ev.kind == "error":
199
+ errorMsg = ev.data.get("error", "์•Œ ์ˆ˜ ์—†๋Š” ์˜ค๋ฅ˜")
200
+ console.print(f"\n [red]{errorMsg}[/]")
201
+ return
202
+ except KeyboardInterrupt:
203
+ console.print("\n [dim]์‘๋‹ต ์ค‘๋‹จ[/]")
204
+
205
+ # tool ๊ฒฐ๊ณผ ๋ฐ์ดํ„ฐ ์ธ๋ผ์ธ ํ‘œ์‹œ (LLM ์‘๋‹ต ์ „์— ๋‚˜์˜จ ํ…Œ์ด๋ธ”)
206
+ if toolPanels:
207
+ console.print()
208
+ for panel in toolPanels:
209
+ _renderToolData(panel, console)
210
+
211
+ console.print()
212
+
213
+ # done ์š”์•ฝ (์†Œ์š” ์‹œ๊ฐ„)
214
+ totalElapsed = time.monotonic() - queryStart
215
+ console.print(Text(f" {totalElapsed:.1f}s", style="dim"))
216
+
217
+ if buffer:
218
+ state.history.append({"role": "user", "content": question})
219
+ state.history.append({"role": "assistant", "content": buffer})
220
+ _saveMessage(state, "user", question)
221
+ _saveMessage(state, "assistant", buffer)
222
+
223
+
224
+ def _toolResultPreview(resultText: str) -> str:
225
+ """tool ๊ฒฐ๊ณผ ํ…์ŠคํŠธ์—์„œ ํ•œ ์ค„ ์š”์•ฝ์„ ์ถ”์ถœํ•œ๋‹ค."""
226
+ if not resultText or resultText.startswith("[์˜ค๋ฅ˜]"):
227
+ return ""
228
+ lines = resultText.strip().splitlines()
229
+ # markdown ํ…Œ์ด๋ธ”์ด ์žˆ์œผ๋ฉด ํ–‰ ์ˆ˜ ํ‘œ์‹œ
230
+ tableRows = [ln for ln in lines if ln.startswith("|") and "---" not in ln]
231
+ if len(tableRows) > 1:
232
+ return f"{len(tableRows) - 1}ํ–‰" # ํ—ค๋” ์ œ์™ธ
233
+ # ์ผ๋ฐ˜ ํ…์ŠคํŠธ๋ฉด ์ฒซ ์ค„ ์•ž๋ถ€๋ถ„
234
+ firstLine = lines[0].strip().lstrip("#").strip() if lines else ""
235
+ if len(firstLine) > 60:
236
+ firstLine = firstLine[:57] + "..."
237
+ return firstLine
238
+
239
+
240
+ def _renderToolData(resultText: str, console) -> None:
241
+ """tool ๊ฒฐ๊ณผ๋ฅผ Rich๋กœ ๋ Œ๋”๋งํ•œ๋‹ค (markdown ํ…Œ์ด๋ธ” ํฌํ•จ)."""
242
+ from rich.markdown import Markdown
243
+ from rich.panel import Panel
244
+
245
+ # markdown ํ…Œ์ด๋ธ”์ด ํฌํ•จ๋œ ๊ฒฝ์šฐ ํŒจ๋„๋กœ ๊ฐ์‹ธ์„œ ํ‘œ์‹œ
246
+ lines = resultText.strip().splitlines()
247
+ hasTable = any(ln.startswith("|") for ln in lines)
248
+ if hasTable:
249
+ # ๋„ˆ๋ฌด ๊ธธ๋ฉด ์•ž๋ถ€๋ถ„๋งŒ (์ตœ๋Œ€ 30์ค„)
250
+ if len(lines) > 30:
251
+ truncated = "\n".join(lines[:30]) + f"\n\n... (+{len(lines) - 30}์ค„)"
252
+ else:
253
+ truncated = resultText.strip()
254
+ console.print(Panel(Markdown(truncated), border_style="dim", padding=(0, 1)))
255
+
256
+
257
+ _TOOL_LABELS = {
258
+ "explore": "๊ณต์‹œ ํƒ์ƒ‰",
259
+ "finance": "์žฌ๋ฌด ๋ฐ์ดํ„ฐ",
260
+ "analyze": "๋ถ„์„ ์—”์ง„",
261
+ "market": "์‹œ์žฅ ๋ฐ์ดํ„ฐ",
262
+ "openapi": "OpenDART API",
263
+ "system": "์‹œ์Šคํ…œ ์ •๋ณด",
264
+ "chart": "์ฐจํŠธ ์ƒ์„ฑ",
265
+ }
266
+
267
+
268
+ def _toolLabel(toolName: str) -> str:
269
+ return _TOOL_LABELS.get(toolName, toolName)
270
+
271
+
272
+ # ---------------------------------------------------------------------------
273
+ # Company management
274
+ # ---------------------------------------------------------------------------
275
+
276
+ def _loadCompany(state: _ChatState, identifier: str, console) -> bool:
277
+ import dartlab
278
+
279
+ state.company = None # GC ์œ ๋„
280
+
281
+ try:
282
+ company = dartlab.Company(identifier)
283
+ except (ValueError, FileNotFoundError, OSError, RuntimeError):
284
+ from dartlab.core.resolve import resolve_from_text
285
+
286
+ company, _ = resolve_from_text(identifier)
287
+
288
+ if company is None:
289
+ console.print(f" [red]์ข…๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {identifier}[/]")
290
+ return False
291
+
292
+ state.company = company
293
+ state.stockCode = company.stockCode
294
+ console.print(f" [bold]{company.corpName}[/] ({company.stockCode}) ๋กœ๋“œ ์™„๋ฃŒ")
295
+ return True
296
+
297
+
298
+ def _tryAutoDetect(userInput: str, state: _ChatState, console) -> None:
299
+ from dartlab.core.resolve import resolve_from_text
300
+
301
+ company, _ = resolve_from_text(userInput)
302
+ if company is not None:
303
+ state.company = company
304
+ state.stockCode = company.stockCode
305
+ console.print(f" [dim]{company.corpName} ({company.stockCode}) ์ž๋™ ๊ฐ์ง€[/]")
306
+
307
+
308
+ # ---------------------------------------------------------------------------
309
+ # Slash commands
310
+ # ---------------------------------------------------------------------------
311
+
312
+ def _handleSlash(userInput: str, state: _ChatState, console) -> bool:
313
+ parts = userInput.split(maxsplit=1)
314
+ cmd = parts[0].lower()
315
+ arg = parts[1].strip() if len(parts) > 1 else ""
316
+
317
+ if cmd in ("/quit", "/exit", "/q"):
318
+ console.print("[dim]์ฑ„ํŒ…์„ ์ข…๋ฃŒํ•ฉ๋‹ˆ๋‹ค.[/]")
319
+ return True
320
+
321
+ handlers = {
322
+ "/help": _cmdHelp,
323
+ "/company": _cmdCompany,
324
+ "/model": _cmdModel,
325
+ "/clear": _cmdClear,
326
+ "/suggest": _cmdSuggest,
327
+ "/status": _cmdStatus,
328
+ }
329
+
330
+ handler = handlers.get(cmd)
331
+ if handler:
332
+ handler(arg, state, console)
333
+ else:
334
+ console.print(f" [yellow]์•Œ ์ˆ˜ ์—†๋Š” ๋ช…๋ น: {cmd}[/] /help ๋กœ ์‚ฌ์šฉ๋ฒ• ํ™•์ธ")
335
+
336
+ return False
337
+
338
+
339
+ def _cmdHelp(_arg: str, _state: _ChatState, console) -> None:
340
+ console.print("""
341
+ [bold]๋ช…๋ น์–ด[/]
342
+ /help ์ด ๋„์›€๋ง
343
+ /company <์ด๋ฆ„/์ฝ”๋“œ> ์ข…๋ชฉ ๋ณ€๊ฒฝ
344
+ /model <์ด๋ฆ„> ๋ชจ๋ธ/provider ๋ณ€๊ฒฝ
345
+ /clear ๋Œ€ํ™” ๊ธฐ๋ก ์ดˆ๊ธฐํ™”
346
+ /suggest ์ถ”์ฒœ ์งˆ๋ฌธ
347
+ /status ํ˜„์žฌ ์„ค์ •
348
+ /quit ์ข…๋ฃŒ
349
+ """)
350
+
351
+
352
+ def _cmdCompany(arg: str, state: _ChatState, console) -> None:
353
+ if not arg:
354
+ if state.company:
355
+ console.print(f" ํ˜„์žฌ: [bold]{state.company.corpName}[/] ({state.stockCode})")
356
+ else:
357
+ console.print(" [dim]๋กœ๋“œ๋œ ์ข…๋ชฉ์ด ์—†์Šต๋‹ˆ๋‹ค. /company ์‚ผ์„ฑ์ „์ž[/]")
358
+ return
359
+
360
+ hadCompany = state.company is not None
361
+ if _loadCompany(state, arg, console):
362
+ if hadCompany:
363
+ state.history.clear()
364
+ state.sessionId = None
365
+ console.print(" [dim]์ข…๋ชฉ ๋ณ€๊ฒฝ์œผ๋กœ ๋Œ€ํ™” ๊ธฐ๋ก์ด ์ดˆ๊ธฐํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.[/]")
366
+
367
+
368
+ def _cmdModel(arg: str, state: _ChatState, console) -> None:
369
+ if not arg:
370
+ console.print(f" provider: [bold]{state.provider}[/]")
371
+ console.print(f" model: {state.model or '(๊ธฐ๋ณธ๊ฐ’)'}")
372
+ return
373
+
374
+ if arg in PROVIDERS:
375
+ state.provider = arg
376
+ state.model = None
377
+ console.print(f" provider -> [bold]{arg}[/]")
378
+ else:
379
+ state.model = arg
380
+ console.print(f" model -> [bold]{arg}[/]")
381
+
382
+
383
+ def _cmdClear(_arg: str, state: _ChatState, console) -> None:
384
+ state.history.clear()
385
+ state.sessionId = None
386
+ console.print(" [dim]๋Œ€ํ™” ๊ธฐ๋ก์ด ์ดˆ๊ธฐํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.[/]")
387
+
388
+
389
+ def _cmdSuggest(_arg: str, state: _ChatState, console) -> None:
390
+ if state.company is None:
391
+ console.print(" [dim]์ข…๋ชฉ์„ ๋จผ์ € ๋กœ๋“œํ•˜์„ธ์š”. /company ์‚ผ์„ฑ์ „์ž[/]")
392
+ return
393
+
394
+ from dartlab.ai.conversation.suggestions import suggestQuestions
395
+
396
+ questions = suggestQuestions(state.company)
397
+ for i, q in enumerate(questions, 1):
398
+ console.print(f" [cyan]{i}.[/] {q}")
399
+
400
+
401
+ def _cmdStatus(_arg: str, state: _ChatState, console) -> None:
402
+ console.print(f" provider: [bold]{state.provider}[/]")
403
+ console.print(f" model: {state.model or '(๊ธฐ๋ณธ๊ฐ’)'}")
404
+ if state.company:
405
+ console.print(f" company: [bold]{state.company.corpName}[/] ({state.stockCode})")
406
+ else:
407
+ console.print(" company: (์—†์Œ)")
408
+ console.print(f" history: {len(state.history)}๊ฐœ ๋ฉ”์‹œ์ง€")
409
+
410
+
411
+ # ---------------------------------------------------------------------------
412
+ # Session persistence
413
+ # ---------------------------------------------------------------------------
414
+
415
+ def _saveMessage(state: _ChatState, role: str, content: str) -> None:
416
+ try:
417
+ from dartlab.cli.services.history import add_message, create_session
418
+
419
+ if state.sessionId is None:
420
+ stockCode = state.stockCode or "__no_company__"
421
+ state.sessionId = create_session(stockCode)
422
+ add_message(state.sessionId, role, content)
423
+ except (OSError, ImportError):
424
+ pass
425
+
426
+
427
+ def _resumeSession(state: _ChatState, console) -> None:
428
+ try:
429
+ from dartlab.cli.services.history import get_latest_session, get_messages
430
+
431
+ sessionId = get_latest_session(state.stockCode)
432
+ if sessionId:
433
+ state.sessionId = sessionId
434
+ state.history = get_messages(sessionId)
435
+ console.print(f" [dim]์ด์ „ ๋Œ€ํ™” ์ด์–ด๊ฐ€๊ธฐ (๋ฉ”๏ฟฝ๏ฟฝ์ง€ {len(state.history)}๊ฐœ)[/]")
436
+ except (OSError, ImportError):
437
+ pass
438
+
439
+
440
+ # ---------------------------------------------------------------------------
441
+ # Welcome
442
+ # ---------------------------------------------------------------------------
443
+
444
+ def _printWelcome(state: _ChatState, console) -> None:
445
+ console.print()
446
+ console.print(" [bold cyan]DartLab Chat[/] -- ๋Œ€ํ™”ํ˜• AI ๊ธฐ์—… ๋ถ„์„")
447
+ providerLine = f" [dim]provider: {state.provider}"
448
+ if state.model:
449
+ providerLine += f" / {state.model}"
450
+ providerLine += "[/]"
451
+ console.print(providerLine)
452
+ console.print()
453
+
454
+ if state.company:
455
+ console.print(f" [bold]{state.company.corpName}[/] ({state.stockCode})")
456
+ try:
457
+ from dartlab.ai.conversation.suggestions import suggestQuestions
458
+
459
+ questions = suggestQuestions(state.company)
460
+ if questions:
461
+ console.print()
462
+ console.print(" [dim]์ถ”์ฒœ ์งˆ๋ฌธ:[/]")
463
+ for q in questions[:4]:
464
+ console.print(f" [dim]-[/] {q}")
465
+ except (ImportError, AttributeError):
466
+ pass
467
+ else:
468
+ console.print(" [dim]์ข…๋ชฉ ์—†์ด ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค. ์งˆ๋ฌธ์— ์ข…๋ชฉ๋ช…์„ ํฌํ•จํ•˜๊ฑฐ๋‚˜ /company ๋ช…๋ น์„ ์‚ฌ์šฉํ•˜์„ธ์š”.[/]")
469
+
470
+ console.print()
471
+ console.print(" [dim]/help ์‚ฌ์šฉ๋ฒ• | /quit ์ข…๋ฃŒ | Ctrl+C ์ž…๋ ฅ ์ทจ์†Œ[/]")
472
+ console.print()
src/dartlab/cli/commands/collect.py CHANGED
@@ -119,6 +119,20 @@ def configure_parser(subparsers) -> None:
119
  action="store_true",
120
  help="๋ˆ„๋ฝ ๊ณต์‹œ๋งŒ ์ฆ๋ถ„ ์ˆ˜์ง‘ (DART)",
121
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  # EDGAR ์ „์šฉ
123
  parser.add_argument(
124
  "--tier",
@@ -139,6 +153,10 @@ def run(args) -> int:
139
  if source == "edgar":
140
  return _runEdgar(console, args)
141
 
 
 
 
 
142
  # --- DART ---
143
  if getattr(args, "check", False):
144
  return _runCheck(console, args)
@@ -178,12 +196,45 @@ def _printHelp(console) -> None:
178
  console.print(" dartlab collect --batch ์ „์ฒด ์ƒ์žฅ ๋ฐฐ์น˜ ์ˆ˜์ง‘")
179
  console.print(" dartlab collect --stats ์ˆ˜์ง‘ ํ˜„ํ™ฉ")
180
  console.print()
 
 
 
 
 
 
181
  console.print(" [bold]EDGAR[/] (ticker = ์˜๋ฌธ โ†’ ์ž๋™ ๊ฐ์ง€):")
182
  console.print(" dartlab collect AAPL MSFT ์ง€์ • ticker ์ˆ˜์ง‘")
183
  console.print(" dartlab collect --tier sp500 S&P 500 ์ „์ฒด ์ˆ˜์ง‘")
184
  console.print(" dartlab collect --tier sp500 --limit 10 10๊ฐœ๋งŒ ํ…Œ์ŠคํŠธ")
185
 
186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  # โ”€โ”€ EDGAR โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
188
 
189
 
 
119
  action="store_true",
120
  help="๋ˆ„๋ฝ ๊ณต์‹œ๋งŒ ์ฆ๋ถ„ ์ˆ˜์ง‘ (DART)",
121
  )
122
+ # scan ํ”„๋ฆฌ๋นŒ๋“œ
123
+ parser.add_argument(
124
+ "--scan",
125
+ nargs="?",
126
+ const="all",
127
+ default=None,
128
+ help="์ „์ข…๋ชฉ scan ํ”„๋ฆฌ๋นŒ๋“œ (all/changes/finance/report)",
129
+ )
130
+ parser.add_argument(
131
+ "--since-year",
132
+ type=int,
133
+ default=2021,
134
+ help="scan ํ”„๋ฆฌ๋นŒ๋“œ ์‹œ์ž‘ ์—ฐ๋„ (๊ธฐ๋ณธ 2021)",
135
+ )
136
  # EDGAR ์ „์šฉ
137
  parser.add_argument(
138
  "--tier",
 
153
  if source == "edgar":
154
  return _runEdgar(console, args)
155
 
156
+ # --- scan ํ”„๋ฆฌ๋นŒ๋“œ ---
157
+ if getattr(args, "scan", None):
158
+ return _runScan(console, args)
159
+
160
  # --- DART ---
161
  if getattr(args, "check", False):
162
  return _runCheck(console, args)
 
196
  console.print(" dartlab collect --batch ์ „์ฒด ์ƒ์žฅ ๋ฐฐ์น˜ ์ˆ˜์ง‘")
197
  console.print(" dartlab collect --stats ์ˆ˜์ง‘ ํ˜„ํ™ฉ")
198
  console.print()
199
+ console.print(" [bold]scan ํ”„๋ฆฌ๋นŒ๋“œ[/]:")
200
+ console.print(" dartlab collect --scan ์ „์ข…๋ชฉ ํšก๋‹จ๋ถ„์„ ํ”„๋ฆฌ๋นŒ๋“œ (changes+finance+report)")
201
+ console.print(" dartlab collect --scan changes changes๋งŒ ํ”„๋ฆฌ๋นŒ๋“œ")
202
+ console.print(" dartlab collect --scan finance finance๋งŒ ํ”„๋ฆฌ๋นŒ๋“œ")
203
+ console.print(" dartlab collect --scan report report๋งŒ ํ”„๋ฆฌ๋นŒ๋“œ")
204
+ console.print()
205
  console.print(" [bold]EDGAR[/] (ticker = ์˜๋ฌธ โ†’ ์ž๋™ ๊ฐ์ง€):")
206
  console.print(" dartlab collect AAPL MSFT ์ง€์ • ticker ์ˆ˜์ง‘")
207
  console.print(" dartlab collect --tier sp500 S&P 500 ์ „์ฒด ์ˆ˜์ง‘")
208
  console.print(" dartlab collect --tier sp500 --limit 10 10๊ฐœ๋งŒ ํ…Œ์ŠคํŠธ")
209
 
210
 
211
+ # โ”€โ”€ scan ํ”„๋ฆฌ๋นŒ๋“œ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
212
+
213
+
214
+ def _runScan(console, args) -> int:
215
+ """์ „์ข…๋ชฉ scan ํ”„๋ฆฌ๋นŒ๋“œ ์‹คํ–‰."""
216
+ from dartlab.market.scan.builder import buildScan, buildChanges, buildFinance, buildReport
217
+
218
+ target = getattr(args, "scan", "all")
219
+ sinceYear = getattr(args, "since_year", 2021)
220
+
221
+ console.print(f"[bold]scan ํ”„๋ฆฌ๋นŒ๋“œ[/] target={target}, sinceYear={sinceYear}")
222
+
223
+ if target == "all":
224
+ buildScan(sinceYear=sinceYear, verbose=True)
225
+ elif target == "changes":
226
+ buildChanges(sinceYear=sinceYear, verbose=True)
227
+ elif target == "finance":
228
+ buildFinance(sinceYear=sinceYear, verbose=True)
229
+ elif target == "report":
230
+ buildReport(sinceYear=sinceYear, verbose=True)
231
+ else:
232
+ console.print(f"[red]์•Œ ์ˆ˜ ์—†๋Š” scan ํƒ€๊ฒŸ: {target}[/]")
233
+ return 1
234
+
235
+ return 0
236
+
237
+
238
  # โ”€โ”€ EDGAR โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
239
 
240
 
src/dartlab/cli/parser.py CHANGED
@@ -19,6 +19,7 @@ COMMAND_SPECS = (
19
  CommandSpec("modules", "dartlab.cli.commands.modules"),
20
  # AI / ๋‚ด๋ณด๋‚ด๊ธฐ
21
  CommandSpec("ask", "dartlab.cli.commands.ask"),
 
22
  CommandSpec("report", "dartlab.cli.commands.report"),
23
  CommandSpec("excel", "dartlab.cli.commands.excel"),
24
  # ๋ถ„์„
 
19
  CommandSpec("modules", "dartlab.cli.commands.modules"),
20
  # AI / ๋‚ด๋ณด๋‚ด๊ธฐ
21
  CommandSpec("ask", "dartlab.cli.commands.ask"),
22
+ CommandSpec("chat", "dartlab.cli.commands.chat"),
23
  CommandSpec("report", "dartlab.cli.commands.report"),
24
  CommandSpec("excel", "dartlab.cli.commands.excel"),
25
  # ๋ถ„์„
src/dartlab/core/dataConfig.py CHANGED
@@ -22,6 +22,10 @@ DATA_RELEASES: dict[str, dict] = {
22
  "dir": "dart/report",
23
  "label": "์ •๊ธฐ๋ณด๊ณ ์„œ ๋ฐ์ดํ„ฐ",
24
  },
 
 
 
 
25
  "edgarDocs": {
26
  "dir": "edgar/docs",
27
  "label": "SEC EDGAR ๊ณต์‹œ ๋ฌธ์„œ ๋ฐ์ดํ„ฐ",
 
22
  "dir": "dart/report",
23
  "label": "์ •๊ธฐ๋ณด๊ณ ์„œ ๋ฐ์ดํ„ฐ",
24
  },
25
+ "scan": {
26
+ "dir": "dart/scan",
27
+ "label": "์ „์ข…๋ชฉ ํšก๋‹จ๋ถ„์„ ํ”„๋ฆฌ๋นŒ๋“œ ๋ฐ์ดํ„ฐ",
28
+ },
29
  "edgarDocs": {
30
  "dir": "edgar/docs",
31
  "label": "SEC EDGAR ๊ณต์‹œ ๋ฌธ์„œ ๋ฐ์ดํ„ฐ",
src/dartlab/core/dataLoader.py CHANGED
@@ -300,11 +300,13 @@ def downloadAll(category: str = "docs", *, forceUpdate: bool = False) -> None:
300
  lastErr = None
301
  for attempt in range(_HF_MAX_RETRIES):
302
  try:
 
 
303
  snapshot_download(
304
  repo_id=HF_REPO,
305
  repo_type="dataset",
306
  local_dir=str(localDir),
307
- allow_patterns=f"{hfDir}/*.parquet",
308
  force_download=forceUpdate if attempt == 0 else False,
309
  )
310
  break
@@ -320,7 +322,8 @@ def downloadAll(category: str = "docs", *, forceUpdate: bool = False) -> None:
320
  f"๋งˆ์ง€๋ง‰ ์—๋Ÿฌ: {lastErr}"
321
  )
322
 
323
- count = len(list(dataDir.glob("*.parquet")))
 
324
  emit("download_all:hf_done", label=label, count=count, dataDir=str(dataDir))
325
 
326
 
 
300
  lastErr = None
301
  for attempt in range(_HF_MAX_RETRIES):
302
  try:
303
+ # scan์€ ํ•˜์œ„ ํด๋”(report/)๋„ ํฌํ•จํ•˜๋ฏ€๋กœ ** ํŒจํ„ด ์‚ฌ์šฉ
304
+ pattern = f"{hfDir}/**/*.parquet" if category == "scan" else f"{hfDir}/*.parquet"
305
  snapshot_download(
306
  repo_id=HF_REPO,
307
  repo_type="dataset",
308
  local_dir=str(localDir),
309
+ allow_patterns=pattern,
310
  force_download=forceUpdate if attempt == 0 else False,
311
  )
312
  break
 
322
  f"๋งˆ์ง€๋ง‰ ์—๋Ÿฌ: {lastErr}"
323
  )
324
 
325
+ globPattern = "**/*.parquet" if category == "scan" else "*.parquet"
326
+ count = len(list(dataDir.glob(globPattern)))
327
  emit("download_all:hf_done", label=label, count=count, dataDir=str(dataDir))
328
 
329
 
src/dartlab/market/_helpers.py CHANGED
@@ -10,11 +10,25 @@ import polars as pl
10
  def scan_parquets(api_type: str, keep_cols: list[str]) -> pl.DataFrame:
11
  """report parquet์—์„œ ํŠน์ • apiType๋งŒ LazyFrame ์Šค์บ”.
12
 
13
- keep_cols ์ค‘ ์‹ค์ œ ์กด์žฌํ•˜๋Š” ์ปฌ๋Ÿผ๋งŒ ์„ ํƒํ•˜๋ฉฐ, ํ•ต์‹ฌ ์ปฌ๋Ÿผ(meta ์ œ์™ธ)์ด
14
- ํ•˜๋‚˜๋„ ์—†๋Š” parquet๋Š” ๊ฑด๋„ˆ๋›ด๋‹ค. ํŒŒ์ผ ๊ฐ„ ์Šคํ‚ค๋งˆ๊ฐ€ ๋‹ค๋ฅด๋ฉด null ํŒจ๋”ฉ์œผ๋กœ ํ†ตํ•ฉ.
15
  """
16
  from dartlab.core.dataLoader import _dataDir
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  report_dir = Path(_dataDir("report"))
19
  parquet_files = sorted(report_dir.glob("*.parquet"))
20
 
@@ -121,6 +135,55 @@ def parse_date_year(s) -> int | None:
121
  return None
122
 
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  def scan_finance_parquets(
125
  statement: str,
126
  account_ids: set[str],
@@ -130,16 +193,26 @@ def scan_finance_parquets(
130
  ) -> dict[str, float]:
131
  """finance parquet ์ „์ˆ˜ ์Šค์บ” โ†’ {์ข…๋ชฉ์ฝ”๋“œ: ๊ฐ’}.
132
 
133
- statement: "BS", "IS", "CIS" ๋“ฑ
134
- account_ids/account_nms: ๋งค์นญ ๋Œ€์ƒ
135
  """
136
  from dartlab.core.dataLoader import _dataDir
137
 
 
 
 
 
 
 
 
 
 
 
 
138
  finance_dir = Path(_dataDir("finance"))
139
  parquet_files = sorted(finance_dir.glob("*.parquet"))
140
 
141
  result: dict[str, float] = {}
142
- sj_divs = [statement] if statement != "IS" else ["IS", "CIS"]
143
  for pf in parquet_files:
144
  code = pf.stem
145
  try:
 
10
  def scan_parquets(api_type: str, keep_cols: list[str]) -> pl.DataFrame:
11
  """report parquet์—์„œ ํŠน์ • apiType๋งŒ LazyFrame ์Šค์บ”.
12
 
13
+ scan/report/{apiType}.parquet ํ”„๋ฆฌ๋นŒ๋“œ๊ฐ€ ์žˆ์œผ๋ฉด ๋‹จ์ผ ํŒŒ์ผ์—์„œ ์ฆ‰์‹œ ๋กœ๋“œ.
14
+ ์—†์œผ๋ฉด ์ข…๋ชฉ๋ณ„ parquet ์ˆœํšŒ (fallback).
15
  """
16
  from dartlab.core.dataLoader import _dataDir
17
 
18
+ # 1์ˆœ์œ„: ํ”„๋ฆฌ๋นŒ๋“œ scan parquet
19
+ scan_path = Path(_dataDir("scan")) / "report" / f"{api_type}.parquet"
20
+ if scan_path.exists():
21
+ try:
22
+ lf = pl.scan_parquet(str(scan_path))
23
+ schema_names = lf.collect_schema().names()
24
+ available = [c for c in keep_cols if c in schema_names]
25
+ non_meta = [c for c in available if c not in ("stockCode", "year", "quarter")]
26
+ if non_meta:
27
+ return lf.select(available).collect()
28
+ except (pl.exceptions.PolarsError, OSError):
29
+ pass # fallback to per-file scan
30
+
31
+ # 2์ˆœ์œ„: ์ข…๋ชฉ๋ณ„ ์ˆœํšŒ (fallback)
32
  report_dir = Path(_dataDir("report"))
33
  parquet_files = sorted(report_dir.glob("*.parquet"))
34
 
 
135
  return None
136
 
137
 
138
+ def _scanFinanceFromMerged(
139
+ scanPath: Path,
140
+ sjDivs: list[str],
141
+ accountIds: set[str],
142
+ accountNms: set[str],
143
+ amountCol: str,
144
+ ) -> dict[str, float]:
145
+ """ํ•ฉ์‚ฐ finance parquet์—์„œ ์ข…๋ชฉ๋ณ„ ์ตœ์‹  ์—ฐ๋„ ๊ฐ’ ์ถ”์ถœ."""
146
+ scCol = "stockCode" if "stockCode" in pl.scan_parquet(str(scanPath)).collect_schema().names() else "stock_code"
147
+
148
+ target = (
149
+ pl.scan_parquet(str(scanPath))
150
+ .filter(
151
+ pl.col("sj_div").is_in(sjDivs)
152
+ & (pl.col("fs_nm").str.contains("์—ฐ๊ฒฐ") | pl.col("fs_nm").str.contains("์žฌ๋ฌด์ œํ‘œ"))
153
+ )
154
+ .collect()
155
+ )
156
+
157
+ if target.is_empty() or "account_id" not in target.columns:
158
+ return {}
159
+
160
+ # ์—ฐ๊ฒฐ ์šฐ์„ 
161
+ cfs = target.filter(pl.col("fs_nm").str.contains("์—ฐ๊ฒฐ"))
162
+ target = cfs if not cfs.is_empty() else target
163
+
164
+ # ์ข…๋ชฉ๋ณ„ ์ตœ์‹  ์—ฐ๋„๋งŒ
165
+ latestYear = (
166
+ target.group_by(scCol)
167
+ .agg(pl.col("bsns_year").max().alias("_maxYear"))
168
+ )
169
+ target = target.join(latestYear, on=scCol).filter(pl.col("bsns_year") == pl.col("_maxYear")).drop("_maxYear")
170
+
171
+ # ๊ณ„์ • ๋งค์นญ
172
+ matched = target.filter(
173
+ pl.col("account_id").is_in(list(accountIds)) | pl.col("account_nm").is_in(list(accountNms))
174
+ )
175
+
176
+ result: dict[str, float] = {}
177
+ for row in matched.iter_rows(named=True):
178
+ code = row.get(scCol, "")
179
+ if code and code not in result:
180
+ val = parse_num(row.get(amountCol))
181
+ if val is not None:
182
+ result[code] = val
183
+
184
+ return result
185
+
186
+
187
  def scan_finance_parquets(
188
  statement: str,
189
  account_ids: set[str],
 
193
  ) -> dict[str, float]:
194
  """finance parquet ์ „์ˆ˜ ์Šค์บ” โ†’ {์ข…๋ชฉ์ฝ”๋“œ: ๊ฐ’}.
195
 
196
+ scan/finance.parquet ํ”„๋ฆฌ๋นŒ๋“œ๊ฐ€ ์žˆ์œผ๋ฉด ๋‹จ์ผ ํŒŒ์ผ์—์„œ ์ฆ‰์‹œ ํ•„ํ„ฐ.
197
+ ์—†์œผ๋ฉด ์ข…๋ชฉ๋ณ„ parquet ์ˆœํšŒ (fallback).
198
  """
199
  from dartlab.core.dataLoader import _dataDir
200
 
201
+ sj_divs = [statement] if statement != "IS" else ["IS", "CIS"]
202
+
203
+ # 1์ˆœ์œ„: ํ”„๋ฆฌ๋นŒ๋“œ scan parquet
204
+ scan_path = Path(_dataDir("scan")) / "finance.parquet"
205
+ if scan_path.exists():
206
+ try:
207
+ return _scanFinanceFromMerged(scan_path, sj_divs, account_ids, account_nms, amount_col)
208
+ except (pl.exceptions.PolarsError, OSError):
209
+ pass # fallback
210
+
211
+ # 2์ˆœ์œ„: ์ข…๋ชฉ๋ณ„ ์ˆœํšŒ (fallback)
212
  finance_dir = Path(_dataDir("finance"))
213
  parquet_files = sorted(finance_dir.glob("*.parquet"))
214
 
215
  result: dict[str, float] = {}
 
216
  for pf in parquet_files:
217
  code = pf.stem
218
  try:
src/dartlab/market/scan/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """์ „์ข…๋ชฉ ํšก๋‹จ๋ถ„์„ ํ”„๋ฆฌ๋นŒ๋“œ โ€” changes + finance + report ํ•ฉ์‚ฐ parquet."""
2
+
3
+ from dartlab.market.scan.builder import buildScan, buildChanges, buildFinance, buildReport
4
+
5
+ __all__ = ["buildScan", "buildChanges", "buildFinance", "buildReport"]
src/dartlab/market/scan/builder.py ADDED
@@ -0,0 +1,436 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """์ „์ข…๋ชฉ scan ํ”„๋ฆฌ๋นŒ๋“œ ๋นŒ๋”.
2
+
3
+ docs โ†’ changes, finance โ†’ ํ•ฉ์‚ฐ, report โ†’ apiType๋ณ„ ๋ถ„๋ฆฌ.
4
+ ์‹คํ—˜ 014/015์—์„œ ๊ฒ€์ฆ๋œ ๋กœ์ง์„ ํ”„๋กœ๋•์…˜ํ™”.
5
+ ๋ฐฐ์น˜๋ฅผ ์ค‘๊ฐ„ ํŒŒ์ผ๋กœ ์“ฐ๊ณ  ๋งˆ์ง€๋ง‰์— ํ•ฉ์‚ฐํ•˜์—ฌ segfault ๋ฐฉ์ง€.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import shutil
11
+ import time
12
+ from pathlib import Path
13
+
14
+ import polars as pl
15
+
16
+ # scanner์—์„œ ์‹ค์ œ ์‚ฌ์šฉํ•˜๋Š” apiType 10๊ฐœ
17
+ SCAN_API_TYPES = [
18
+ "majorHolder",
19
+ "executive",
20
+ "employee",
21
+ "executivePayAllTotal",
22
+ "executivePayIndividual",
23
+ "auditOpinion",
24
+ "dividend",
25
+ "treasuryStock",
26
+ "capitalChange",
27
+ "corporateBond",
28
+ ]
29
+
30
+ _BATCH = 200
31
+
32
+
33
+ def _scanDir() -> Path:
34
+ """scan ์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ."""
35
+ from dartlab.core.dataLoader import _dataDir
36
+
37
+ return Path(_dataDir("scan"))
38
+
39
+
40
+ def _docsDir() -> Path:
41
+ from dartlab.core.dataLoader import _dataDir
42
+
43
+ return Path(_dataDir("docs"))
44
+
45
+
46
+ def _financeDir() -> Path:
47
+ from dartlab.core.dataLoader import _dataDir
48
+
49
+ return Path(_dataDir("finance"))
50
+
51
+
52
+ def _reportDir() -> Path:
53
+ from dartlab.core.dataLoader import _dataDir
54
+
55
+ return Path(_dataDir("report"))
56
+
57
+
58
+ def _log(msg: str) -> None:
59
+ print(msg)
60
+
61
+
62
+ def _mergeBatchFiles(batchDir: Path, outputPath: Path, *, how: str = "vertical") -> int:
63
+ """๋ฐฐ์น˜ ํŒŒ์ผ๋“ค์„ ์ฝ์–ด์„œ 1๊ฐœ๋กœ ํ•ฉ์‚ฐ. ๋ฐ˜ํ™˜: ์ด ํ–‰์ˆ˜."""
64
+ batchFiles = sorted(batchDir.glob("batch_*.parquet"))
65
+ if not batchFiles:
66
+ return 0
67
+
68
+ parts = [pl.read_parquet(str(f)) for f in batchFiles]
69
+ merged = pl.concat(parts, how=how)
70
+ merged.write_parquet(str(outputPath), compression="zstd")
71
+ totalRows = merged.height
72
+ del merged, parts
73
+ return totalRows
74
+
75
+
76
+ # โ”€โ”€ changes โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
77
+
78
+
79
+ def _buildRawChanges(parquetPath: Path, stockCode: str, sinceYear: int = 2021) -> pl.DataFrame | None:
80
+ """raw docs parquet โ†’ section ๋‹จ์œ„ changes."""
81
+ try:
82
+ raw = pl.read_parquet(str(parquetPath))
83
+ except (pl.exceptions.PolarsError, OSError):
84
+ return None
85
+
86
+ needed = {"year", "section_order", "section_title", "section_content"}
87
+ if not needed.issubset(set(raw.columns)):
88
+ return None
89
+
90
+ raw = raw.filter(pl.col("year").cast(pl.Utf8).str.to_integer(strict=False) >= sinceYear - 1)
91
+ if raw.height < 2:
92
+ return None
93
+
94
+ work = raw.select(["year", "section_order", "section_title", "section_content"])
95
+ work = work.sort(["section_order", "section_title", "year"])
96
+
97
+ work = work.with_columns([
98
+ pl.col("year").shift(1).over(["section_order", "section_title"]).alias("_prevYear"),
99
+ pl.col("section_content").shift(1).over(["section_order", "section_title"]).alias("_prevContent"),
100
+ ])
101
+
102
+ work = work.with_columns([
103
+ pl.col("section_content").hash().alias("_hash"),
104
+ pl.col("_prevContent").hash().alias("_prevHash"),
105
+ pl.col("section_content").str.len_chars().alias("sizeB"),
106
+ pl.col("_prevContent").str.len_chars().alias("sizeA"),
107
+ pl.col("section_content").str.slice(0, 200).alias("preview"),
108
+ ])
109
+
110
+ changes = work.filter(
111
+ pl.col("_prevYear").is_not_null()
112
+ & ~(pl.col("section_content").is_null() & pl.col("_prevContent").is_null())
113
+ & (
114
+ (pl.col("_hash") != pl.col("_prevHash"))
115
+ | pl.col("section_content").is_null()
116
+ | pl.col("_prevContent").is_null()
117
+ )
118
+ )
119
+
120
+ if changes.height == 0:
121
+ return None
122
+
123
+ numPattern = r"[\d,.]+"
124
+ changes = changes.with_columns([
125
+ pl.col("section_content").str.replace_all(numPattern, "N").alias("_stripped"),
126
+ pl.col("_prevContent").str.replace_all(numPattern, "N").alias("_prevStripped"),
127
+ ])
128
+
129
+ changes = changes.with_columns(
130
+ pl.when(pl.col("_prevContent").is_null())
131
+ .then(pl.lit("appeared"))
132
+ .when(pl.col("section_content").is_null())
133
+ .then(pl.lit("disappeared"))
134
+ .when(pl.col("_stripped") == pl.col("_prevStripped"))
135
+ .then(pl.lit("numeric"))
136
+ .when(
137
+ (pl.col("sizeA") > 0)
138
+ & ((pl.col("sizeB").cast(pl.Int64) - pl.col("sizeA").cast(pl.Int64)).abs().cast(pl.Float64)
139
+ / pl.col("sizeA").cast(pl.Float64) > 0.5)
140
+ )
141
+ .then(pl.lit("structural"))
142
+ .otherwise(pl.lit("wording"))
143
+ .alias("changeType")
144
+ )
145
+
146
+ changes = changes.filter(pl.col("year").cast(pl.Utf8).str.to_integer(strict=False) >= sinceYear)
147
+
148
+ return changes.select([
149
+ pl.col("_prevYear").alias("fromPeriod"),
150
+ pl.col("year").alias("toPeriod"),
151
+ pl.col("section_title").alias("sectionTitle"),
152
+ pl.col("changeType"),
153
+ pl.col("sizeA"),
154
+ pl.col("sizeB"),
155
+ (pl.col("sizeB").cast(pl.Int64) - pl.col("sizeA").cast(pl.Int64)).alias("sizeDelta"),
156
+ pl.col("preview"),
157
+ pl.lit(stockCode).alias("stockCode"),
158
+ ])
159
+
160
+
161
+ def buildChanges(*, sinceYear: int = 2021, verbose: bool = True) -> Path | None:
162
+ """docs โ†’ changes ํ”„๋ฆฌ๋นŒ๋“œ. ๋ฐ˜ํ™˜: ์ถœ๋ ฅ parquet ๊ฒฝ๋กœ."""
163
+ docsDir = _docsDir()
164
+ outDir = _scanDir()
165
+ outDir.mkdir(parents=True, exist_ok=True)
166
+ outputPath = outDir / "changes.parquet"
167
+ batchDir = outDir / "_tmp_changes"
168
+ batchDir.mkdir(parents=True, exist_ok=True)
169
+
170
+ allFiles = sorted(docsDir.glob("*.parquet"))
171
+ if not allFiles:
172
+ if verbose:
173
+ _log("docs parquet ์—†์Œ โ€” changes ๋นŒ๋“œ ๊ฑด๋„ˆ๋œ€")
174
+ return None
175
+
176
+ if verbose:
177
+ _log(f"[changes] {len(allFiles)}์ข…๋ชฉ, sinceYear={sinceYear}")
178
+
179
+ t0 = time.perf_counter()
180
+ batchChunks: list[pl.DataFrame] = []
181
+ success = 0
182
+ failed = 0
183
+ totalRows = 0
184
+ batchIdx = 0
185
+
186
+ for i, pf in enumerate(allFiles):
187
+ result = _buildRawChanges(pf, pf.stem, sinceYear)
188
+ if result is not None and result.height > 0:
189
+ batchChunks.append(result)
190
+ totalRows += result.height
191
+ success += 1
192
+ else:
193
+ failed += 1
194
+
195
+ if len(batchChunks) >= _BATCH or i == len(allFiles) - 1:
196
+ if batchChunks:
197
+ batch = pl.concat(batchChunks)
198
+ batch.write_parquet(str(batchDir / f"batch_{batchIdx:03d}.parquet"), compression="zstd")
199
+ del batch
200
+ batchChunks = []
201
+ batchIdx += 1
202
+
203
+ if verbose and (i + 1) % 500 == 0:
204
+ _log(f" [{i+1}/{len(allFiles)}] {success}ok {failed}fail {totalRows:,}rows {time.perf_counter()-t0:.0f}s")
205
+
206
+ if batchIdx == 0:
207
+ if verbose:
208
+ _log(" changes ๊ฒฐ๊ณผ ์—†์Œ")
209
+ shutil.rmtree(batchDir, ignore_errors=True)
210
+ return None
211
+
212
+ _mergeBatchFiles(batchDir, outputPath)
213
+ shutil.rmtree(batchDir, ignore_errors=True)
214
+
215
+ elapsed = time.perf_counter() - t0
216
+ diskMb = outputPath.stat().st_size / 1024 / 1024
217
+ if verbose:
218
+ _log(f" ์™„๋ฃŒ: {success}์ข…๋ชฉ, {totalRows:,}ํ–‰, {diskMb:.1f}MB, {elapsed:.0f}์ดˆ")
219
+
220
+ return outputPath
221
+
222
+
223
+ # โ”€โ”€ finance โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
224
+
225
+
226
+ def buildFinance(*, sinceYear: int = 2021, verbose: bool = True) -> Path | None:
227
+ """finance ์ „์ข…๋ชฉ ํ•ฉ์‚ฐ. ๋ฐ˜ํ™˜: ์ถœ๋ ฅ parquet ๊ฒฝ๋กœ."""
228
+ finDir = _financeDir()
229
+ outDir = _scanDir()
230
+ outDir.mkdir(parents=True, exist_ok=True)
231
+ outputPath = outDir / "finance.parquet"
232
+ batchDir = outDir / "_tmp_finance"
233
+ batchDir.mkdir(parents=True, exist_ok=True)
234
+
235
+ allFiles = sorted(finDir.glob("*.parquet"))
236
+ if not allFiles:
237
+ if verbose:
238
+ _log("finance parquet ์—†์Œ โ€” ๋นŒ๋“œ ๊ฑด๋„ˆ๋œ€")
239
+ return None
240
+
241
+ if verbose:
242
+ _log(f"[finance] {len(allFiles)}์ข…๋ชฉ, sinceYear={sinceYear}")
243
+
244
+ t0 = time.perf_counter()
245
+ batchChunks: list[pl.DataFrame] = []
246
+ success = 0
247
+ totalRows = 0
248
+ batchIdx = 0
249
+
250
+ for i, pf in enumerate(allFiles):
251
+ try:
252
+ df = pl.read_parquet(str(pf))
253
+ except (pl.exceptions.PolarsError, OSError):
254
+ continue
255
+
256
+ if "stockCode" not in df.columns and "stock_code" not in df.columns:
257
+ df = df.with_columns(pl.lit(pf.stem).alias("stockCode"))
258
+ elif "stock_code" in df.columns and "stockCode" not in df.columns:
259
+ df = df.rename({"stock_code": "stockCode"})
260
+
261
+ if "bsns_year" in df.columns:
262
+ df = df.filter(
263
+ pl.col("bsns_year").cast(pl.Utf8).str.to_integer(strict=False) >= sinceYear
264
+ )
265
+
266
+ if df.height == 0:
267
+ continue
268
+
269
+ batchChunks.append(df)
270
+ totalRows += df.height
271
+ success += 1
272
+
273
+ if len(batchChunks) >= _BATCH or i == len(allFiles) - 1:
274
+ if batchChunks:
275
+ batch = pl.concat(batchChunks, how="diagonal_relaxed")
276
+ batch.write_parquet(str(batchDir / f"batch_{batchIdx:03d}.parquet"), compression="zstd")
277
+ del batch
278
+ batchChunks = []
279
+ batchIdx += 1
280
+
281
+ if verbose and (i + 1) % 500 == 0:
282
+ _log(f" [{i+1}/{len(allFiles)}] {success}ok {totalRows:,}rows {time.perf_counter()-t0:.0f}s")
283
+
284
+ if batchIdx == 0:
285
+ if verbose:
286
+ _log(" finance ๊ฒฐ๊ณผ ์—†์Œ")
287
+ shutil.rmtree(batchDir, ignore_errors=True)
288
+ return None
289
+
290
+ _mergeBatchFiles(batchDir, outputPath, how="diagonal_relaxed")
291
+ shutil.rmtree(batchDir, ignore_errors=True)
292
+
293
+ elapsed = time.perf_counter() - t0
294
+ diskMb = outputPath.stat().st_size / 1024 / 1024
295
+ if verbose:
296
+ _log(f" ์™„๋ฃŒ: {success}์ข…๋ชฉ, {totalRows:,}ํ–‰, {diskMb:.1f}MB, {elapsed:.0f}์ดˆ")
297
+
298
+ return outputPath
299
+
300
+
301
+ # โ”€โ”€ report โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€๏ฟฝ๏ฟฝ๏ฟฝโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
302
+
303
+
304
+ def buildReport(*, sinceYear: int = 2021, verbose: bool = True) -> list[Path]:
305
+ """report โ†’ apiType๋ณ„ ๋ถ„๋ฆฌ parquet. ๋ฐ˜ํ™˜: ์ƒ์„ฑ๋œ ํŒŒ์ผ ๊ฒฝ๋กœ ๋ชฉ๋ก."""
306
+ repDir = _reportDir()
307
+ outDir = _scanDir() / "report"
308
+ outDir.mkdir(parents=True, exist_ok=True)
309
+
310
+ allFiles = sorted(repDir.glob("*.parquet"))
311
+ if not allFiles:
312
+ if verbose:
313
+ _log("report parquet ์—†์Œ โ€” ๋นŒ๋“œ ๊ฑด๋„ˆ๋œ€")
314
+ return []
315
+
316
+ if verbose:
317
+ _log(f"[report] {len(allFiles)}์ข…๋ชฉ โ†’ apiType๋ณ„ ๋ถ„๋ฆฌ")
318
+
319
+ t0 = time.perf_counter()
320
+
321
+ # apiType๋ณ„ ๋ฐฐ์น˜ ๋””๋ ‰ํ† ๋ฆฌ
322
+ apiBatchDirs: dict[str, Path] = {}
323
+ apiBatchIdx: dict[str, int] = {}
324
+ apiChunks: dict[str, list[pl.DataFrame]] = {}
325
+ apiRows: dict[str, int] = {}
326
+ for at in SCAN_API_TYPES:
327
+ bd = outDir / f"_tmp_{at}"
328
+ bd.mkdir(parents=True, exist_ok=True)
329
+ apiBatchDirs[at] = bd
330
+ apiBatchIdx[at] = 0
331
+ apiChunks[at] = []
332
+ apiRows[at] = 0
333
+
334
+ processed = 0
335
+
336
+ for i, pf in enumerate(allFiles):
337
+ try:
338
+ df = pl.read_parquet(str(pf))
339
+ except (pl.exceptions.PolarsError, OSError):
340
+ continue
341
+
342
+ if "apiType" not in df.columns:
343
+ continue
344
+
345
+ if "stockCode" not in df.columns and "stock_code" not in df.columns:
346
+ df = df.with_columns(pl.lit(pf.stem).alias("stockCode"))
347
+
348
+ if "year" in df.columns:
349
+ df = df.with_columns(
350
+ pl.col("year").cast(pl.Utf8).str.to_integer(strict=False).alias("_yearInt")
351
+ )
352
+ df = df.filter(
353
+ pl.col("_yearInt").is_null() | (pl.col("_yearInt") >= sinceYear)
354
+ ).drop("_yearInt")
355
+
356
+ processed += 1
357
+
358
+ for apiType in SCAN_API_TYPES:
359
+ sub = df.filter(pl.col("apiType") == apiType)
360
+ if sub.height > 0:
361
+ apiChunks[apiType].append(sub)
362
+ apiRows[apiType] += sub.height
363
+
364
+ if len(apiChunks[apiType]) >= _BATCH:
365
+ batch = pl.concat(apiChunks[apiType], how="diagonal_relaxed")
366
+ idx = apiBatchIdx[apiType]
367
+ batch.write_parquet(
368
+ str(apiBatchDirs[apiType] / f"batch_{idx:03d}.parquet"),
369
+ compression="zstd",
370
+ )
371
+ del batch
372
+ apiChunks[apiType] = []
373
+ apiBatchIdx[apiType] = idx + 1
374
+
375
+ if verbose and (i + 1) % 500 == 0:
376
+ _log(f" [{i+1}/{len(allFiles)}] {processed}ok {time.perf_counter()-t0:.0f}s")
377
+
378
+ # ๋‚จ์€ ์ฒญํฌ flush + ํ•ฉ์‚ฐ
379
+ outputs: list[Path] = []
380
+ for apiType in SCAN_API_TYPES:
381
+ # ๋‚จ์€ ์ฒญํฌ ์“ฐ๊ธฐ
382
+ if apiChunks[apiType]:
383
+ batch = pl.concat(apiChunks[apiType], how="diagonal_relaxed")
384
+ idx = apiBatchIdx[apiType]
385
+ batch.write_parquet(
386
+ str(apiBatchDirs[apiType] / f"batch_{idx:03d}.parquet"),
387
+ compression="zstd",
388
+ )
389
+ del batch
390
+ apiBatchIdx[apiType] = idx + 1
391
+
392
+ if apiBatchIdx[apiType] == 0:
393
+ shutil.rmtree(apiBatchDirs[apiType], ignore_errors=True)
394
+ continue
395
+
396
+ outPath = outDir / f"{apiType}.parquet"
397
+ _mergeBatchFiles(apiBatchDirs[apiType], outPath, how="diagonal_relaxed")
398
+ shutil.rmtree(apiBatchDirs[apiType], ignore_errors=True)
399
+
400
+ diskMb = outPath.stat().st_size / 1024 / 1024
401
+ outputs.append(outPath)
402
+ if verbose:
403
+ _log(f" {apiType}: {apiRows[apiType]:,}ํ–‰, {diskMb:.1f}MB")
404
+
405
+ elapsed = time.perf_counter() - t0
406
+ if verbose:
407
+ _log(f" report ์™„๋ฃŒ: {len(outputs)}๊ฐœ apiType, {elapsed:.0f}์ดˆ")
408
+
409
+ return outputs
410
+
411
+
412
+ # โ”€โ”€ ์ „์ฒด ๋นŒ๋“œ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
413
+
414
+
415
+ def buildScan(*, sinceYear: int = 2021, verbose: bool = True) -> dict[str, Path | list[Path] | None]:
416
+ """changes + finance + report ์ „์ฒด ํ”„๋ฆฌ๋นŒ๋“œ."""
417
+ if verbose:
418
+ _log(f"์ „์ข…๋ชฉ scan ํ”„๋ฆฌ๋นŒ๋“œ ์‹œ์ž‘ (sinceYear={sinceYear})")
419
+ _log("=" * 60)
420
+
421
+ results: dict[str, Path | list[Path] | None] = {}
422
+
423
+ results["changes"] = buildChanges(sinceYear=sinceYear, verbose=verbose)
424
+ results["finance"] = buildFinance(sinceYear=sinceYear, verbose=verbose)
425
+ results["report"] = buildReport(sinceYear=sinceYear, verbose=verbose)
426
+
427
+ if verbose:
428
+ _log("=" * 60)
429
+ scanDir = _scanDir()
430
+ if scanDir.exists():
431
+ totalMb = sum(
432
+ f.stat().st_size for f in scanDir.rglob("*.parquet")
433
+ ) / 1024 / 1024
434
+ _log(f"scan ์ „์ฒด: {totalMb:.1f}MB")
435
+
436
+ return results
src/dartlab/providers/dart/_sections_source.py CHANGED
@@ -6,6 +6,7 @@ raw DataFrame๋ฅผ ๊ฐ์‹ธ๋˜, ๊ฐ™์€ ๊ฒฝ๋กœ์—์„œ freq/semantic ํŒŒ์ƒํ‘œ๋ฅผ ๋ฐ”
6
 
7
  from __future__ import annotations
8
 
 
9
  from typing import TYPE_CHECKING, Any
10
 
11
  import polars as pl
@@ -13,6 +14,9 @@ import polars as pl
13
  if TYPE_CHECKING:
14
  from dartlab.providers.dart.company import Company
15
 
 
 
 
16
 
17
  class _SectionsSource:
18
  """sections source-of-truth accessor.
@@ -176,6 +180,38 @@ class _SectionsSource:
176
  changedOnly=changedOnly,
177
  )
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  def __getattr__(self, name: str) -> Any:
180
  frame = self.raw
181
  if frame is None:
@@ -199,6 +235,120 @@ class _SectionsSource:
199
  return (
200
  "SectionsSource("
201
  "shape="
202
- f"{frame.shape}, methods=[raw, topics(), outline(), periods(), ordered(), coverage(), freq(), semanticRegistry(), semanticCollisions(), structureRegistry(), structureCollisions(), structureEvents(), structureSummary(), structureChanges()]"
203
  ")"
204
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  from __future__ import annotations
8
 
9
+ import re
10
  from typing import TYPE_CHECKING, Any
11
 
12
  import polars as pl
 
14
  if TYPE_CHECKING:
15
  from dartlab.providers.dart.company import Company
16
 
17
+ _PERIOD_RE = re.compile(r"^\d{4}$")
18
+ _NUM_PATTERN = r"[\d,.]+"
19
+
20
 
21
  class _SectionsSource:
22
  """sections source-of-truth accessor.
 
180
  changedOnly=changedOnly,
181
  )
182
 
183
+ def changes(
184
+ self,
185
+ *,
186
+ topic: str | None = None,
187
+ fromPeriod: str | None = None,
188
+ toPeriod: str | None = None,
189
+ ) -> pl.DataFrame | None:
190
+ """๊ธฐ๊ฐ„ ๊ฐ„ ๋ณ€ํ™” ๋ธ”๋ก ์ถ”์ถœ (๋ฒกํ„ฐํ™”).
191
+
192
+ sections wide DataFrame์—์„œ ์ธ์ ‘ ๊ธฐ๊ฐ„ ๋น„๊ต๋กœ ๋ณ€ํ™”๋งŒ ์ถ”์ถœ.
193
+ 5์ข… ์œ ํ˜•: appeared, disappeared, numeric, structural, wording.
194
+ """
195
+ frame = self.raw
196
+ if frame is None:
197
+ return None
198
+ return _buildChanges(frame, topic=topic, fromPeriod=fromPeriod, toPeriod=toPeriod)
199
+
200
+ def changeSummary(self, *, topN: int = 10) -> pl.DataFrame | None:
201
+ """topic๋ณ„ ๋ณ€ํ™” ์š”์•ฝ โ€” AI ์ปจํ…์ŠคํŠธ์šฉ."""
202
+ ch = self.changes()
203
+ if ch is None or ch.is_empty():
204
+ return None
205
+ return (
206
+ ch.group_by(["topic", "changeType"])
207
+ .agg(
208
+ pl.len().alias("count"),
209
+ pl.col("sizeDelta").mean().round(0).cast(pl.Int64).alias("avgDelta"),
210
+ )
211
+ .sort(["topic", "count"], descending=[False, True])
212
+ .head(topN * 5)
213
+ )
214
+
215
  def __getattr__(self, name: str) -> Any:
216
  frame = self.raw
217
  if frame is None:
 
235
  return (
236
  "SectionsSource("
237
  "shape="
238
+ f"{frame.shape}, methods=[raw, topics(), outline(), periods(), ordered(), coverage(), freq(), changes(), changeSummary(), semanticRegistry(), semanticCollisions(), structureRegistry(), structureCollisions(), structureEvents(), structureSummary(), structureChanges()]"
239
  ")"
240
  )
241
+
242
+
243
+ def _buildChanges(
244
+ sections: pl.DataFrame,
245
+ *,
246
+ topic: str | None = None,
247
+ fromPeriod: str | None = None,
248
+ toPeriod: str | None = None,
249
+ ) -> pl.DataFrame:
250
+ """sections wide DataFrame โ†’ ๋ณ€ํ™” ๋ธ”๋ก DataFrame (๋ฒกํ„ฐํ™”).
251
+
252
+ ์‹คํ—˜ 101-010์—์„œ ๊ฒ€์ฆ๋œ Polars ๋ฒกํ„ฐํ™” ํŒจํ„ด.
253
+ 0.15์ดˆ์— 22,060ํ–‰ ์ƒ์„ฑ (Python ๋ฃจํ”„ ๋Œ€๋น„ 12x).
254
+ """
255
+ annualCols = sorted(c for c in sections.columns if _PERIOD_RE.match(c))
256
+ if len(annualCols) < 2:
257
+ return pl.DataFrame()
258
+
259
+ metaCols = ["topic"]
260
+ for col in ("textPathKey", "blockType", "blockOrder"):
261
+ if col in sections.columns:
262
+ metaCols.append(col)
263
+
264
+ if topic is not None:
265
+ sections = sections.filter(pl.col("topic") == topic)
266
+ if sections.is_empty():
267
+ return pl.DataFrame()
268
+
269
+ work = sections.with_row_index("_row")
270
+
271
+ # wide โ†’ long
272
+ long = work.select(["_row"] + metaCols + annualCols).unpivot(
273
+ index=["_row"] + metaCols,
274
+ on=annualCols,
275
+ variable_name="period",
276
+ value_name="text",
277
+ )
278
+ long = long.with_columns(pl.col("text").cast(pl.Utf8))
279
+
280
+ # hash + len (null ๋ณด์กด)
281
+ long = long.with_columns(
282
+ pl.when(pl.col("text").is_not_null())
283
+ .then(pl.col("text").hash())
284
+ .otherwise(pl.lit(None, dtype=pl.UInt64))
285
+ .alias("_hash"),
286
+ pl.when(pl.col("text").is_not_null())
287
+ .then(pl.col("text").str.len_chars())
288
+ .otherwise(pl.lit(None, dtype=pl.UInt32))
289
+ .alias("_len"),
290
+ pl.when(pl.col("text").is_not_null())
291
+ .then(pl.col("text").str.slice(0, 200))
292
+ .otherwise(pl.lit(None, dtype=pl.Utf8))
293
+ .alias("preview"),
294
+ )
295
+
296
+ # ์ธ์ ‘ ๊ธฐ๊ฐ„ ๋น„๊ต
297
+ long = long.sort(["_row", "period"])
298
+ long = long.with_columns(
299
+ pl.col("period").shift(1).over("_row").alias("_prevPeriod"),
300
+ pl.col("_hash").shift(1).over("_row").alias("_prevHash"),
301
+ pl.col("_len").shift(1).over("_row").alias("_prevLen"),
302
+ pl.col("text").shift(1).over("_row").alias("_prevText"),
303
+ )
304
+
305
+ # ๋ณ€ํ™” ํ•„ํ„ฐ
306
+ changes = long.filter(
307
+ pl.col("_prevPeriod").is_not_null()
308
+ & ~(pl.col("text").is_null() & pl.col("_prevText").is_null())
309
+ & ((pl.col("_hash") != pl.col("_prevHash")) | pl.col("text").is_null() | pl.col("_prevText").is_null())
310
+ )
311
+
312
+ if changes.is_empty():
313
+ return pl.DataFrame()
314
+
315
+ # ๊ธฐ๊ฐ„ ํ•„ํ„ฐ
316
+ if fromPeriod is not None:
317
+ changes = changes.filter(pl.col("_prevPeriod") >= fromPeriod)
318
+ if toPeriod is not None:
319
+ changes = changes.filter(pl.col("period") <= toPeriod)
320
+
321
+ # ๋ณ€ํ™” ์œ ํ˜• ๋ถ„๋ฅ˜
322
+ changes = changes.with_columns(
323
+ pl.col("text").str.replace_all(_NUM_PATTERN, "N").alias("_stripped"),
324
+ pl.col("_prevText").str.replace_all(_NUM_PATTERN, "N").alias("_prevStripped"),
325
+ )
326
+
327
+ changes = changes.with_columns(
328
+ pl.when(pl.col("_prevText").is_null())
329
+ .then(pl.lit("appeared"))
330
+ .when(pl.col("text").is_null())
331
+ .then(pl.lit("disappeared"))
332
+ .when(pl.col("_stripped") == pl.col("_prevStripped"))
333
+ .then(pl.lit("numeric"))
334
+ .when(
335
+ (pl.col("_prevLen") > 0)
336
+ & (
337
+ (pl.col("_len").cast(pl.Int64) - pl.col("_prevLen").cast(pl.Int64)).abs().cast(pl.Float64)
338
+ / pl.col("_prevLen").cast(pl.Float64)
339
+ > 0.5
340
+ )
341
+ )
342
+ .then(pl.lit("structural"))
343
+ .otherwise(pl.lit("wording"))
344
+ .alias("changeType")
345
+ )
346
+
347
+ # ๊ฒฐ๊ณผ ์ •๋ฆฌ
348
+ resultCols = ["_prevPeriod", "period", "changeType", "_prevLen", "_len", "preview"] + metaCols
349
+ renameMap = {"_prevPeriod": "fromPeriod", "period": "toPeriod", "_prevLen": "sizeA", "_len": "sizeB"}
350
+
351
+ result = changes.select(resultCols).rename(renameMap)
352
+ result = result.with_columns((pl.col("sizeB").cast(pl.Int64) - pl.col("sizeA").cast(pl.Int64)).alias("sizeDelta"))
353
+
354
+ return result
src/dartlab/review/builders.py CHANGED
@@ -396,21 +396,18 @@ def fundingSourcesBlock(data: dict) -> list:
396
  )
397
  blocks.append(MetricBlock(metrics))
398
 
399
- # ์‹œ๊ณ„์—ด ํ…Œ์ด๋ธ”
400
  history = data.get("history", [])
401
  if len(history) >= 2:
402
- histRows = []
403
  for h in history:
404
- histRows.append(
405
- {
406
- "๊ธฐ๊ฐ„": h["period"],
407
- "๋‚ด๋ถ€์œ ๋ณด": f"{h['retainedPct']:.0f}%",
408
- "์ฃผ์ฃผ์ž๋ณธ": f"{h['paidInPct']:.0f}%",
409
- "๊ธˆ์œต์ฐจ์ž…": f"{h['finDebtPct']:.0f}%",
410
- "์˜์—…์กฐ๋‹ฌ": f"{h['opFundingPct']:.0f}%",
411
- }
412
- )
413
- blocks.append(TableBlock("์กฐ๋‹ฌ์› ๋น„์ค‘ ์ถ”์ด", pl.DataFrame(histRows)))
414
 
415
  # ๋ณด์ถฉ ์ง€ํ‘œ (์ˆœ์ฐจ์ž…๊ธˆ/EBITDA, ์•”๋ฌต์  ์ฐจ์ž…๊ธˆ๋ฆฌ)
416
  suppMetrics = []
 
396
  )
397
  blocks.append(MetricBlock(metrics))
398
 
399
+ # ์‹œ๊ณ„์—ด ํ…Œ์ด๋ธ” (ํ–‰=ํ•ญ๋ชฉ, ์—ด=๊ธฐ๊ฐ„)
400
  history = data.get("history", [])
401
  if len(history) >= 2:
402
+ cols = {"": ["๋‚ด๋ถ€์œ ๋ณด", "์ฃผ์ฃผ์ž๋ณธ", "๊ธˆ์œต์ฐจ์ž…", "์˜์—…์กฐ๋‹ฌ"]}
403
  for h in history:
404
+ cols[h["period"]] = [
405
+ f"{h['retainedPct']:.0f}%",
406
+ f"{h['paidInPct']:.0f}%",
407
+ f"{h['finDebtPct']:.0f}%",
408
+ f"{h['opFundingPct']:.0f}%",
409
+ ]
410
+ blocks.append(TableBlock("์กฐ๋‹ฌ์› ๋น„์ค‘ ์ถ”์ด", pl.DataFrame(cols)))
 
 
 
411
 
412
  # ๋ณด์ถฉ ์ง€ํ‘œ (์ˆœ์ฐจ์ž…๊ธˆ/EBITDA, ์•”๋ฌต์  ์ฐจ์ž…๊ธˆ๋ฆฌ)
413
  suppMetrics = []