Spaces:
Sleeping
Sleeping
github-actions[bot] commited on
Commit ยท
6254e2b
1
Parent(s): bc7389c
sync from 4c3ef19
Browse files- Dockerfile +22 -2
- README_PROJECT.md +0 -1108
- pyproject.toml +1 -0
- src/dartlab/ai/DEV.md +72 -0
- src/dartlab/ai/context/builder.py +66 -4
- src/dartlab/ai/context/pruning.py +95 -0
- src/dartlab/ai/conversation/prompts.py +34 -7
- src/dartlab/ai/conversation/templates/analysisPhilosophy.py +57 -0
- src/dartlab/ai/eval/batchResults/batch_ollama_20260327_124945.jsonl +35 -0
- src/dartlab/ai/eval/batchResults/batch_ollama_20260327_131602.jsonl +4 -0
- src/dartlab/ai/eval/batchResults/batch_ollama_20260327_132810.jsonl +11 -0
- src/dartlab/ai/eval/diagnosisReports/diagnosis_batch_20260327_124945.md +21 -0
- src/dartlab/ai/eval/diagnosisReports/diagnosis_batch_20260327_131602.md +15 -0
- src/dartlab/ai/memory/__init__.py +8 -0
- src/dartlab/ai/memory/store.py +154 -0
- src/dartlab/ai/memory/summarizer.py +55 -0
- src/dartlab/ai/providers/oauth_codex.py +57 -11
- src/dartlab/ai/runtime/agent.py +124 -7
- src/dartlab/ai/runtime/core.py +40 -2
- src/dartlab/ai/runtime/run_modes.py +87 -2
- src/dartlab/ai/runtime/scratchpad.py +115 -0
- src/dartlab/ai/skills/__init__.py +9 -0
- src/dartlab/ai/skills/catalog.py +145 -0
- src/dartlab/ai/skills/registry.py +58 -0
- src/dartlab/ai/tools/defaults/helpers.py +5 -2
- src/dartlab/cli/commands/chat.py +472 -0
- src/dartlab/cli/commands/collect.py +51 -0
- src/dartlab/cli/parser.py +1 -0
- src/dartlab/core/dataConfig.py +4 -0
- src/dartlab/core/dataLoader.py +5 -2
- src/dartlab/market/_helpers.py +78 -5
- src/dartlab/market/scan/__init__.py +5 -0
- src/dartlab/market/scan/builder.py +436 -0
- src/dartlab/providers/dart/_sections_source.py +151 -1
- src/dartlab/review/builders.py +9 -12
Dockerfile
CHANGED
|
@@ -4,14 +4,34 @@ WORKDIR /app
|
|
| 4 |
|
| 5 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 6 |
build-essential \
|
|
|
|
|
|
|
| 7 |
&& rm -rf /var/lib/apt/lists/*
|
| 8 |
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
COPY src/ src/
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
| 13 |
|
| 14 |
ENV SPACE_ID=1
|
|
|
|
| 15 |
|
| 16 |
EXPOSE 7860
|
| 17 |
|
|
|
|
| 4 |
|
| 5 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 6 |
build-essential \
|
| 7 |
+
libxml2-dev \
|
| 8 |
+
libxslt1-dev \
|
| 9 |
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
|
| 11 |
+
# ํต์ฌ ์์กด์ฑ๋ง ๋จผ์ ์ค์น (wheel ์ฐ์ , ๋น๋ ์คํจ ๋ฐฉ์ง)
|
| 12 |
+
RUN pip install --no-cache-dir \
|
| 13 |
+
polars \
|
| 14 |
+
beautifulsoup4 lxml \
|
| 15 |
+
httpx requests orjson \
|
| 16 |
+
openpyxl rich plotly \
|
| 17 |
+
prompt-toolkit \
|
| 18 |
+
alive-progress \
|
| 19 |
+
diff-match-patch \
|
| 20 |
+
fastapi uvicorn[standard] sse-starlette msgpack
|
| 21 |
+
|
| 22 |
+
COPY pyproject.toml ./
|
| 23 |
COPY src/ src/
|
| 24 |
+
RUN touch README.md
|
| 25 |
+
|
| 26 |
+
# --no-deps: ์์์ ์ด๋ฏธ ์ค์นํ ์์กด์ฑ ์ฌ์ค์น ๋ฐฉ์ง, marimo/mcp ๊ฑด๋๋
|
| 27 |
+
RUN pip install --no-cache-dir --no-deps -e .
|
| 28 |
|
| 29 |
+
# HF Spaces user
|
| 30 |
+
RUN useradd -m -u 1000 user
|
| 31 |
+
USER user
|
| 32 |
|
| 33 |
ENV SPACE_ID=1
|
| 34 |
+
ENV HOME=/home/user
|
| 35 |
|
| 36 |
EXPOSE 7860
|
| 37 |
|
README_PROJECT.md
DELETED
|
@@ -1,1108 +0,0 @@
|
|
| 1 |
-
<div align="center">
|
| 2 |
-
|
| 3 |
-
<br>
|
| 4 |
-
|
| 5 |
-
<img alt="DartLab" src=".github/assets/logo.png" width="180">
|
| 6 |
-
|
| 7 |
-
<h3>DartLab</h3>
|
| 8 |
-
|
| 9 |
-
<p><b>One stock code. The whole story.</b></p>
|
| 10 |
-
<p>DART + EDGAR filings, structured and comparable โ in one line of Python.</p>
|
| 11 |
-
|
| 12 |
-
<p>
|
| 13 |
-
<a href="https://pypi.org/project/dartlab/"><img src="https://img.shields.io/pypi/v/dartlab?style=for-the-badge&color=ea4647&labelColor=050811&logo=pypi&logoColor=white" alt="PyPI"></a>
|
| 14 |
-
<a href="https://pypi.org/project/dartlab/"><img src="https://img.shields.io/pypi/pyversions/dartlab?style=for-the-badge&color=c83232&labelColor=050811&logo=python&logoColor=white" alt="Python"></a>
|
| 15 |
-
<a href="LICENSE"><img src="https://img.shields.io/badge/License-MIT-94a3b8?style=for-the-badge&labelColor=050811" alt="License"></a>
|
| 16 |
-
<a href="https://github.com/eddmpython/dartlab/actions/workflows/ci.yml"><img src="https://img.shields.io/github/actions/workflow/status/eddmpython/dartlab/ci.yml?branch=master&style=for-the-badge&labelColor=050811&logo=github&logoColor=white&label=CI" alt="CI"></a>
|
| 17 |
-
<a href="https://eddmpython.github.io/dartlab/"><img src="https://img.shields.io/badge/Docs-GitHub_Pages-38bdf8?style=for-the-badge&labelColor=050811&logo=github-pages&logoColor=white" alt="Docs"></a>
|
| 18 |
-
<a href="https://eddmpython.github.io/dartlab/blog/"><img src="https://img.shields.io/badge/Blog-120%2B_Articles-fbbf24?style=for-the-badge&labelColor=050811&logo=rss&logoColor=white" alt="Blog"></a>
|
| 19 |
-
</p>
|
| 20 |
-
|
| 21 |
-
<p>
|
| 22 |
-
<a href="https://eddmpython.github.io/dartlab/">Docs</a> ยท <a href="https://eddmpython.github.io/dartlab/blog/">Blog</a> ยท <a href="https://huggingface.co/spaces/eddmpython/dartlab">Live Demo</a> ยท <a href="notebooks/marimo/">Marimo Notebooks</a> ยท <a href="https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/01_quickstart.ipynb">Open in Colab</a> ยท <a href="README_KR.md">ํ๊ตญ์ด</a> ยท <a href="https://buymeacoffee.com/eddmpython">Sponsor</a>
|
| 23 |
-
</p>
|
| 24 |
-
|
| 25 |
-
<p>
|
| 26 |
-
<a href="https://huggingface.co/datasets/eddmpython/dartlab-data"><img src="https://img.shields.io/badge/Data-HuggingFace-ffd21e?style=for-the-badge&labelColor=050811&logo=huggingface&logoColor=white" alt="HuggingFace Data"></a>
|
| 27 |
-
</p>
|
| 28 |
-
|
| 29 |
-
</div>
|
| 30 |
-
|
| 31 |
-
> **Note:** DartLab is under active development. APIs may change between versions, and documentation may lag behind the latest code.
|
| 32 |
-
|
| 33 |
-
## Install
|
| 34 |
-
|
| 35 |
-
Requires **Python 3.12+**.
|
| 36 |
-
|
| 37 |
-
```bash
|
| 38 |
-
# Core โ financial statements, sections, Company
|
| 39 |
-
uv add dartlab
|
| 40 |
-
|
| 41 |
-
# or with pip
|
| 42 |
-
pip install dartlab
|
| 43 |
-
```
|
| 44 |
-
|
| 45 |
-
### Optional Extras
|
| 46 |
-
|
| 47 |
-
Install only what you need:
|
| 48 |
-
|
| 49 |
-
```bash
|
| 50 |
-
uv add "dartlab[ai]" # web UI, server, streaming (FastAPI + uvicorn)
|
| 51 |
-
uv add "dartlab[llm]" # LLM analysis (OpenAI)
|
| 52 |
-
uv add "dartlab[charts]" # Plotly charts, network graphs (plotly + networkx + scipy)
|
| 53 |
-
uv add "dartlab[mcp]" # MCP server for Claude Desktop / Code / Cursor
|
| 54 |
-
uv add "dartlab[channel]" # web UI + cloudflared tunnel sharing
|
| 55 |
-
uv add "dartlab[channel-ngrok]" # web UI + ngrok tunnel sharing
|
| 56 |
-
uv add "dartlab[channel-full]" # all channels + Telegram / Slack / Discord bots
|
| 57 |
-
uv add "dartlab[all]" # everything above (except channel bots)
|
| 58 |
-
```
|
| 59 |
-
|
| 60 |
-
**Common combinations:**
|
| 61 |
-
|
| 62 |
-
```bash
|
| 63 |
-
# financial analysis + AI chat
|
| 64 |
-
uv add "dartlab[ai,llm]"
|
| 65 |
-
|
| 66 |
-
# full analysis suite โ charts, AI, LLM
|
| 67 |
-
uv add "dartlab[ai,llm,charts]"
|
| 68 |
-
|
| 69 |
-
# share analysis with team via tunnel
|
| 70 |
-
uv add "dartlab[channel]"
|
| 71 |
-
```
|
| 72 |
-
|
| 73 |
-
### From Source
|
| 74 |
-
|
| 75 |
-
```bash
|
| 76 |
-
git clone https://github.com/eddmpython/dartlab.git
|
| 77 |
-
cd dartlab && uv pip install -e ".[all]"
|
| 78 |
-
|
| 79 |
-
# or with pip
|
| 80 |
-
pip install -e ".[all]"
|
| 81 |
-
```
|
| 82 |
-
|
| 83 |
-
PyPI releases are published only when the core is stable. If you want the latest features (including experimental ones like audit, forecast, valuation), clone the repo directly โ but expect occasional breaking changes.
|
| 84 |
-
|
| 85 |
-
### Desktop App (Alpha)
|
| 86 |
-
|
| 87 |
-
Skip all installation steps โ download the standalone Windows launcher:
|
| 88 |
-
|
| 89 |
-
- **[Download DartLab.exe](https://github.com/eddmpython/dartlab-desktop/releases/latest/download/DartLab.exe)** from [dartlab-desktop](https://github.com/eddmpython/dartlab-desktop)
|
| 90 |
-
- Also available from the [DartLab landing page](https://eddmpython.github.io/dartlab/)
|
| 91 |
-
|
| 92 |
-
One-click launch โ no Python, no terminal, no package manager required. The desktop app bundles the web UI with a built-in Python runtime.
|
| 93 |
-
|
| 94 |
-
> **Alpha** โ functional but incomplete. The desktop app is a Windows-only `.exe` launcher. macOS/Linux are not yet supported.
|
| 95 |
-
|
| 96 |
-
---
|
| 97 |
-
|
| 98 |
-
**No data setup required.** When you create a `Company`, dartlab automatically downloads the required data from [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data) (DART) or SEC API (EDGAR). The second run loads instantly from local cache.
|
| 99 |
-
|
| 100 |
-
## Quick Start
|
| 101 |
-
|
| 102 |
-
Pick any company. Get the whole picture.
|
| 103 |
-
|
| 104 |
-
```python
|
| 105 |
-
import dartlab
|
| 106 |
-
|
| 107 |
-
# Samsung Electronics โ from raw filings to structured data
|
| 108 |
-
c = dartlab.Company("005930")
|
| 109 |
-
c.sections # every topic, every period, side by side
|
| 110 |
-
c.show("businessOverview") # what this company actually does
|
| 111 |
-
c.diff("businessOverview") # what changed since last year
|
| 112 |
-
c.BS # standardized balance sheet
|
| 113 |
-
c.ratios # 47 financial ratios, already calculated
|
| 114 |
-
|
| 115 |
-
# Apple โ same interface, different country
|
| 116 |
-
us = dartlab.Company("AAPL")
|
| 117 |
-
us.show("business")
|
| 118 |
-
us.ratios
|
| 119 |
-
|
| 120 |
-
# No code needed โ ask in natural language
|
| 121 |
-
dartlab.ask("Analyze Samsung Electronics financial health")
|
| 122 |
-
```
|
| 123 |
-
|
| 124 |
-
## What DartLab Is
|
| 125 |
-
|
| 126 |
-
A public company files hundreds of pages every quarter. Inside those pages is everything โ revenue trends, risk warnings, management strategy, competitive position. The complete truth about a company, written by the company itself.
|
| 127 |
-
|
| 128 |
-
Nobody reads it.
|
| 129 |
-
|
| 130 |
-
Not because they don't want to. Because the same information is named differently by every company, structured differently every year, and scattered across formats designed for regulators, not readers. The same "revenue" appears as `ifrs-full_Revenue`, `dart_Revenue`, `SalesRevenue`, or dozens of Korean variations.
|
| 131 |
-
|
| 132 |
-
DartLab changes who can access this information. Two engines turn raw filings into one comparable map:
|
| 133 |
-
|
| 134 |
-
### The Two Problems DartLab Solves
|
| 135 |
-
|
| 136 |
-
**1. The same company says different things differently every year.**
|
| 137 |
-
|
| 138 |
-
Sections horizontalization normalizes every disclosure section into a **topic ร period** grid. Different titles across years and industries all resolve to the same canonical topic:
|
| 139 |
-
|
| 140 |
-
```
|
| 141 |
-
2025Q4 2024Q4 2024Q3 2023Q4 โฆ
|
| 142 |
-
companyOverview โ โ โ โ
|
| 143 |
-
businessOverview โ โ โ โ
|
| 144 |
-
productService โ โ โ โ
|
| 145 |
-
salesOrder โ โ โ โ
|
| 146 |
-
employee โ โ โ โ
|
| 147 |
-
dividend โ โ โ โ
|
| 148 |
-
audit โ โ โ โ
|
| 149 |
-
โฆ (98 canonical topics)
|
| 150 |
-
```
|
| 151 |
-
|
| 152 |
-
```
|
| 153 |
-
Before (raw section titles): After (canonical topic):
|
| 154 |
-
Samsung "II. ์ฌ์
์ ๋ด์ฉ" โ businessOverview
|
| 155 |
-
Hyundai "II. ์ฌ์
์ ๋ด์ฉ [์๋์ฐจ๋ถ๋ฌธ]" โ businessOverview
|
| 156 |
-
Kakao "2. ์ฌ์
์ ๋ด์ฉ" โ businessOverview
|
| 157 |
-
```
|
| 158 |
-
|
| 159 |
-
The mapping pipeline: **text normalization** โ **545 hardcoded title mappings** โ **73 regex patterns** โ canonical topic. ~95%+ mapping rate across all listed companies. Each cell keeps the full text with heading/body separation, tables, and original evidence. Comparing "what did the company say about risk last year vs. this year" becomes a single `diff()` call.
|
| 160 |
-
|
| 161 |
-
**2. Every company names the same number differently.**
|
| 162 |
-
|
| 163 |
-
Account standardization normalizes every XBRL account through a 4-step pipeline:
|
| 164 |
-
|
| 165 |
-
```
|
| 166 |
-
Raw XBRL account_id
|
| 167 |
-
โ Strip prefixes (ifrs-full_, dart_, ifrs_, ifrs-smes_)
|
| 168 |
-
โ English ID synonyms (59 rules)
|
| 169 |
-
โ Korean name synonyms (104 rules)
|
| 170 |
-
โ Learned mapping table (34,249 entries)
|
| 171 |
-
โ Result: revenue, operatingIncome, totalAssets, โฆ
|
| 172 |
-
```
|
| 173 |
-
|
| 174 |
-
```
|
| 175 |
-
Before (raw XBRL): After (standardized):
|
| 176 |
-
Company account_id account_nm โ snakeId label
|
| 177 |
-
Samsung ifrs-full_Revenue ์์ต(๋งค์ถ์ก) โ revenue ๋งค์ถ์ก
|
| 178 |
-
SK Hynix dart_Revenue ๋งค์ถ์ก โ revenue ๋งค์ถ์ก
|
| 179 |
-
LG Energy Revenue ๋งค์ถ โ revenue ๋งค์ถ์ก
|
| 180 |
-
```
|
| 181 |
-
|
| 182 |
-
~97% mapping rate. Cross-company comparison requires zero manual work. Combined with `scanAccount` / `scanRatio`, you can compare a single metric across **2,700+ companies** in one call.
|
| 183 |
-
|
| 184 |
-
### Principles โ Accessibility and Reliability
|
| 185 |
-
|
| 186 |
-
These two principles govern every public API:
|
| 187 |
-
|
| 188 |
-
**Accessibility** โ One stock code is all you need. `import dartlab` provides access to every feature. No internal DTOs, no extra imports, no data setup. `Company("005930")` auto-downloads from [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data).
|
| 189 |
-
|
| 190 |
-
**Reliability** โ Numbers are raw originals from DART/EDGAR. Missing data returns `None`, never a guess. `trace(topic)` shows which source was chosen and why. Errors are never swallowed.
|
| 191 |
-
|
| 192 |
-
### Company โ The Merged Map
|
| 193 |
-
|
| 194 |
-
`Company` uses `sections` as the spine, then overlays stronger data sources:
|
| 195 |
-
|
| 196 |
-
```
|
| 197 |
-
Layer What it provides Priority
|
| 198 |
-
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 199 |
-
docs Section text, tables, evidence Base spine
|
| 200 |
-
finance BS, IS, CF, ratios, time series Replaces numeric topics
|
| 201 |
-
report 28 structured APIs (DART only) Fills structured topics
|
| 202 |
-
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 203 |
-
profile Merged view (default for users) Highest
|
| 204 |
-
```
|
| 205 |
-
|
| 206 |
-
```python
|
| 207 |
-
c.docs.sections # pure text source (sections spine)
|
| 208 |
-
c.finance.BS # authoritative financial statements
|
| 209 |
-
c.report.extract() # structured DART API data
|
| 210 |
-
c.profile.sections # merged view โ what users see by default
|
| 211 |
-
```
|
| 212 |
-
|
| 213 |
-
`c.sections` is the merged view. `c.trace("BS")` tells you which source was chosen and why.
|
| 214 |
-
|
| 215 |
-
### Architecture โ Layered by Responsibility
|
| 216 |
-
|
| 217 |
-
DartLab follows a strict layered architecture where each layer only depends on layers below it:
|
| 218 |
-
|
| 219 |
-
```
|
| 220 |
-
L0 core/ Protocols, finance utils, docs utils, registry
|
| 221 |
-
L1 providers/ Country-specific data (DART, EDGAR, EDINET)
|
| 222 |
-
gather/ External market data (Naver, Yahoo, FRED)
|
| 223 |
-
market/ Market-wide scanning (2,700+ companies)
|
| 224 |
-
L2 analysis/ Analytical engines (valuation, risk, insights, event study)
|
| 225 |
-
L3 ai/ LLM-powered analysis (9 providers)
|
| 226 |
-
```
|
| 227 |
-
|
| 228 |
-
Import direction is enforced by CI โ no reverse dependencies allowed.
|
| 229 |
-
|
| 230 |
-
### Extensibility โ Zero Core Modification
|
| 231 |
-
|
| 232 |
-
Adding a new country requires zero changes to core code:
|
| 233 |
-
|
| 234 |
-
1. Create a provider package under `providers/`
|
| 235 |
-
2. Implement `canHandle(code) -> bool` and `priority() -> int`
|
| 236 |
-
3. Register via `entry_points` in `pyproject.toml`
|
| 237 |
-
|
| 238 |
-
```python
|
| 239 |
-
dartlab.Company("005930") # โ DART provider (priority 10)
|
| 240 |
-
dartlab.Company("AAPL") # โ EDGAR provider (priority 20)
|
| 241 |
-
```
|
| 242 |
-
|
| 243 |
-
The facade iterates providers by priority โ first match wins. This follows the same pattern as OpenBB's provider system and scikit-learn's estimator registration.
|
| 244 |
-
|
| 245 |
-
## Core Features
|
| 246 |
-
|
| 247 |
-
### Show, Trace, Diff
|
| 248 |
-
|
| 249 |
-
```python
|
| 250 |
-
c = dartlab.Company("005930")
|
| 251 |
-
|
| 252 |
-
# show โ open any topic with source-aware priority
|
| 253 |
-
c.show("BS") # โ finance DataFrame
|
| 254 |
-
c.show("overview") # โ sections-based text + tables
|
| 255 |
-
c.show("dividend") # โ report DataFrame (all quarters)
|
| 256 |
-
c.show("IS", period=["2024Q4", "2023Q4"]) # compare specific periods
|
| 257 |
-
|
| 258 |
-
# trace โ why a topic came from docs, finance, or report
|
| 259 |
-
c.trace("BS") # โ {"primarySource": "finance", ...}
|
| 260 |
-
|
| 261 |
-
# diff โ text change detection (3 modes)
|
| 262 |
-
c.diff() # full summary
|
| 263 |
-
c.diff("businessOverview") # topic history
|
| 264 |
-
c.diff("businessOverview", "2024", "2025") # line-by-line diff
|
| 265 |
-
```
|
| 266 |
-
|
| 267 |
-
What the output looks like:
|
| 268 |
-
|
| 269 |
-
```
|
| 270 |
-
>>> c.show("businessOverview")
|
| 271 |
-
shape: (12, 5)
|
| 272 |
-
โโโโโโโโโโโโโฌโโโโโโโโโโโฌโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฌโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 273 |
-
โ blockType โ nodeType โ 2024 โ 2023 โ
|
| 274 |
-
โโโโโโโโโโโโโผโโโโโโโโโโโผโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโผโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโค
|
| 275 |
-
โ text โ heading โ 1. ์ฐ์
์ ํน์ฑ โ 1. ์ฐ์
์ ํน์ฑ โ
|
| 276 |
-
โ text โ body โ ๋ฐ๋์ฒด ์ฐ์
์ ๊ธฐ์ ์ง์ฝ์ โฆ โ ๋ฐ๋์ฒด ์ฐ์
์ ๊ธฐ์ ์ง์ฝ์ โฆ โ
|
| 277 |
-
โ table โ null โ DataFrame(5ร3) โ DataFrame(5ร3) โ
|
| 278 |
-
โโโโโโโโโโโโโดโโโโโโโโโโโดโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโดโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 279 |
-
|
| 280 |
-
>>> c.diff("businessOverview", "2023", "2024")
|
| 281 |
-
โโโโโโโโโโโโฌโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 282 |
-
โ status โ text โ
|
| 283 |
-
โโโโโโโโโโโโผโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโค
|
| 284 |
-
โ added โ AI ๋ฐ๋์ฒด ์์ ๊ธ์ฆ์ ๋ฐ๋ฅธ HBM ๋งค์ถ ํ๋ โฆ โ
|
| 285 |
-
โ modified โ ๋งค์ถ์ก 258.9์กฐ์ โ 300.9์กฐ์ โ
|
| 286 |
-
โ removed โ ๋ฐ๋์ฒด ๋ถ๋ฌธ ์์ต์ฑ ์
ํ ์ฐ๋ ค โฆ โ
|
| 287 |
-
โโโโโโโโโโโโดโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 288 |
-
```
|
| 289 |
-
|
| 290 |
-
### Finance
|
| 291 |
-
|
| 292 |
-
```python
|
| 293 |
-
c.BS # balance sheet (account ร period, newest first)
|
| 294 |
-
c.IS # income statement
|
| 295 |
-
c.CF # cash flow
|
| 296 |
-
c.ratios # ratio time series DataFrame (6 categories ร period)
|
| 297 |
-
c.finance.ratioSeries # ratio time series across years
|
| 298 |
-
c.finance.timeseries # raw account time series
|
| 299 |
-
c.annual # annual time series
|
| 300 |
-
c.filings() # disclosure document list (Tier 1 Stable)
|
| 301 |
-
```
|
| 302 |
-
|
| 303 |
-
All accounts are normalized through the 4-step standardization pipeline โ Samsung's `revenue` and LG's `revenue` are the same `snakeId`. Ratios cover 6 categories: profitability, stability, growth, efficiency, cashflow, and valuation.
|
| 304 |
-
|
| 305 |
-
### Market-wide Financial Screening
|
| 306 |
-
|
| 307 |
-
Scan a single account or ratio across **all listed companies** in one call โ 2,700+ DART firms or 500+ EDGAR firms. Returns a wide Polars DataFrame (rows = companies, columns = periods, newest first).
|
| 308 |
-
|
| 309 |
-
```python
|
| 310 |
-
import dartlab
|
| 311 |
-
|
| 312 |
-
# scan a single account across all listed companies
|
| 313 |
-
dartlab.scanAccount("๋งค์ถ์ก") # revenue, quarterly standalone
|
| 314 |
-
dartlab.scanAccount("operating_profit", annual=True) # annual basis
|
| 315 |
-
dartlab.scanAccount("total_assets", market="edgar") # US EDGAR
|
| 316 |
-
|
| 317 |
-
# scan a ratio across all listed companies
|
| 318 |
-
dartlab.scanRatio("roe") # quarterly ROE for all firms
|
| 319 |
-
dartlab.scanRatio("debtRatio", annual=True) # annual debt-to-equity
|
| 320 |
-
|
| 321 |
-
# list available ratios (13 ratios: profitability, stability, growth, efficiency, cashflow)
|
| 322 |
-
dartlab.scanRatioList()
|
| 323 |
-
```
|
| 324 |
-
|
| 325 |
-
Accepts both Korean names (`๋งค์ถ์ก`) and English snakeIds (`sales`) โ same 4-step normalization as Company finance. Reads 2,700+ parquet files in parallel via ThreadPool, typically completes in ~3 seconds.
|
| 326 |
-
|
| 327 |
-
> **Requires pre-downloaded data.** Market-wide functions (`scanAccount`, `screen`, `digest`, etc.) operate on local data โ individual `Company()` calls only download one firm at a time. Download all data first:
|
| 328 |
-
> ```python
|
| 329 |
-
> pip install dartlab[hf]
|
| 330 |
-
> dartlab.downloadAll("finance") # ~600 MB, 2,700+ firms
|
| 331 |
-
> dartlab.downloadAll("report") # ~320 MB (governance/workforce/capital/debt)
|
| 332 |
-
> dartlab.downloadAll("docs") # ~8 GB (digest/signal โ large)
|
| 333 |
-
> ```
|
| 334 |
-
|
| 335 |
-
## Review โ Structured Company Analysis
|
| 336 |
-
|
| 337 |
-
> **Experimental** โ the review system is under active development. Templates, blocks, and output formats may change between versions.
|
| 338 |
-
|
| 339 |
-
DartLab's review system assembles financial data into structured, readable reports.
|
| 340 |
-
|
| 341 |
-
### Templates
|
| 342 |
-
|
| 343 |
-
Pre-built block combinations that cover key analysis areas:
|
| 344 |
-
|
| 345 |
-
```python
|
| 346 |
-
c = dartlab.Company("005930")
|
| 347 |
-
|
| 348 |
-
c.review("์์ต๊ตฌ์กฐ") # revenue structure โ segments, growth, concentration
|
| 349 |
-
c.review("์๊ธ์กฐ๋ฌ") # capital structure โ debt, liquidity, interest burden
|
| 350 |
-
c.review() # all templates
|
| 351 |
-
```
|
| 352 |
-
|
| 353 |
-
### Block Assembly
|
| 354 |
-
|
| 355 |
-
Every review is built from reusable blocks. Get the full block dictionary and assemble your own:
|
| 356 |
-
|
| 357 |
-
```python
|
| 358 |
-
from dartlab.review import blocks, Review
|
| 359 |
-
|
| 360 |
-
b = blocks(c) # dict of 16 pre-built blocks
|
| 361 |
-
list(b.keys()) # โ ["profile", "segmentComposition", "growth", ...]
|
| 362 |
-
|
| 363 |
-
# pick what you need
|
| 364 |
-
Review([
|
| 365 |
-
b["segmentComposition"],
|
| 366 |
-
b["growth"],
|
| 367 |
-
c.select("IS", ["๋งค์ถ์ก"]), # mix with raw data
|
| 368 |
-
])
|
| 369 |
-
```
|
| 370 |
-
|
| 371 |
-
### Reviewer โ AI Layer
|
| 372 |
-
|
| 373 |
-
Add LLM-powered opinions on top of data blocks. Works with any provider:
|
| 374 |
-
|
| 375 |
-
```python
|
| 376 |
-
c.reviewer() # all sections + AI opinion
|
| 377 |
-
c.reviewer("์์ต๊ตฌ์กฐ") # single section + AI
|
| 378 |
-
c.reviewer(guide="Evaluate from semiconductor cycle perspective") # custom guide
|
| 379 |
-
```
|
| 380 |
-
|
| 381 |
-
**Free AI providers** โ no paid API key required:
|
| 382 |
-
|
| 383 |
-
| Provider | Setup |
|
| 384 |
-
|----------|-------|
|
| 385 |
-
| Gemini | `dartlab setup gemini` |
|
| 386 |
-
| Groq | `dartlab setup groq` |
|
| 387 |
-
| Cerebras | `dartlab setup cerebras` |
|
| 388 |
-
| Mistral | `dartlab setup mistral` |
|
| 389 |
-
|
| 390 |
-
Or use any OpenAI-compatible endpoint:
|
| 391 |
-
```bash
|
| 392 |
-
dartlab setup custom --base-url http://localhost:11434/v1 # Ollama local
|
| 393 |
-
```
|
| 394 |
-
|
| 395 |
-
### Customization
|
| 396 |
-
|
| 397 |
-
- **Templates**: Pre-defined block combinations (`์์ต๊ตฌ์กฐ`, `์๊ธ์กฐ๋ฌ`)
|
| 398 |
-
- **Free assembly**: Mix any blocks + raw DataFrames in `Review([...])`
|
| 399 |
-
- **Guide**: Pass `guide="..."` to `c.reviewer()` for domain-specific AI analysis
|
| 400 |
-
- **Layout**: `ReviewLayout(indentH1=2, gapAfterH1=1, ...)` for rendering control
|
| 401 |
-
- **Render formats**: `review.render("rich" | "html" | "markdown" | "json")`
|
| 402 |
-
|
| 403 |
-
See [notebooks/marimo/sampleReview.py](notebooks/marimo/sampleReview.py) for interactive examples.
|
| 404 |
-
|
| 405 |
-
## Additional Features
|
| 406 |
-
|
| 407 |
-
> Features below are **beta** or **experimental** โ APIs may change. See [stability](docs/stability.md).
|
| 408 |
-
|
| 409 |
-
### Insights (beta)
|
| 410 |
-
|
| 411 |
-
> **Beta** โ API may change after a warning. See [stability](docs/stability.md).
|
| 412 |
-
|
| 413 |
-
```python
|
| 414 |
-
c.insights # 10-area analysis
|
| 415 |
-
c.insights.grades() # โ {"performance": "A", "profitability": "B", โฆ}
|
| 416 |
-
c.insights.performance.grade # โ "A"
|
| 417 |
-
c.insights.performance.details # โ ["Revenue growth +8.3%", โฆ]
|
| 418 |
-
c.insights.anomalies # โ outliers and red flags
|
| 419 |
-
|
| 420 |
-
# distress scorecard โ 6-model bankruptcy/fraud prediction
|
| 421 |
-
c.insights.distress # Altman Z-Score, Beneish M-Score, Ohlson O-Score,
|
| 422 |
-
# Merton Distance-to-Default, Piotroski F-Score, Sloan Ratio
|
| 423 |
-
```
|
| 424 |
-
|
| 425 |
-
### Valuation, Forecast & Simulation
|
| 426 |
-
|
| 427 |
-
```python
|
| 428 |
-
dartlab.valuation("005930") # DCF + DDM + relative valuation
|
| 429 |
-
dartlab.forecast("005930") # revenue forecast (4-source ensemble)
|
| 430 |
-
dartlab.simulation("005930") # scenario simulation (macro presets)
|
| 431 |
-
|
| 432 |
-
# also available as Company methods
|
| 433 |
-
c.valuation()
|
| 434 |
-
c.forecast(horizon=3)
|
| 435 |
-
c.simulation(scenarios=["adverse", "rate_hike"])
|
| 436 |
-
```
|
| 437 |
-
|
| 438 |
-
Auto-detects currency โ KRW for DART companies, USD for EDGAR. Works with both `dartlab.valuation("AAPL")` and `dartlab.valuation("005930")`.
|
| 439 |
-
|
| 440 |
-
### Audit (beta)
|
| 441 |
-
|
| 442 |
-
> **Beta** โ API may change after a warning. See [stability](docs/stability.md).
|
| 443 |
-
|
| 444 |
-
```python
|
| 445 |
-
dartlab.audit("005930") # 11 red flag detectors
|
| 446 |
-
|
| 447 |
-
# Benford's Law (digit distribution), auditor change (PCAOB AS 3101),
|
| 448 |
-
# going concern (ISA 570), internal control (SOX 302/404),
|
| 449 |
-
# revenue quality (Dechow & Dichev), Merton default probability, ...
|
| 450 |
-
```
|
| 451 |
-
|
| 452 |
-
### Market Intelligence (beta)
|
| 453 |
-
|
| 454 |
-
> **Beta** โ API may change after a warning. See [stability](docs/stability.md).
|
| 455 |
-
|
| 456 |
-
```python
|
| 457 |
-
dartlab.digest() # market-wide disclosure change digest
|
| 458 |
-
dartlab.digest(sector="๋ฐ๋์ฒด") # sector filter
|
| 459 |
-
dartlab.groupHealth() # group health: network ร financial ratios
|
| 460 |
-
```
|
| 461 |
-
|
| 462 |
-
### Modules
|
| 463 |
-
|
| 464 |
-
DartLab exposes 100+ modules across 6 categories:
|
| 465 |
-
|
| 466 |
-
```bash
|
| 467 |
-
dartlab modules # list all modules
|
| 468 |
-
dartlab modules --category finance # filter by category
|
| 469 |
-
dartlab modules --search dividend # search by keyword
|
| 470 |
-
```
|
| 471 |
-
|
| 472 |
-
```python
|
| 473 |
-
c.topics # list all available topics for this company
|
| 474 |
-
```
|
| 475 |
-
|
| 476 |
-
Categories: `finance` (statements, ratios), `report` (dividend, governance, audit), `notes` (K-IFRS annotations), `disclosure` (narrative text), `analysis` (insights, rankings), `raw` (original parquets).
|
| 477 |
-
|
| 478 |
-
### Charts & Visualization (beta)
|
| 479 |
-
|
| 480 |
-
> **Beta** โ API may change after a warning. See [stability](docs/stability.md).
|
| 481 |
-
|
| 482 |
-
```python
|
| 483 |
-
c = dartlab.Company("005930")
|
| 484 |
-
|
| 485 |
-
# one-liner Plotly charts
|
| 486 |
-
dartlab.chart.revenue(c).show() # revenue + operating margin combo
|
| 487 |
-
dartlab.chart.cashflow(c).show() # operating/investing/financing CF
|
| 488 |
-
dartlab.chart.dividend(c).show() # DPS + yield + payout ratio
|
| 489 |
-
dartlab.chart.profitability(c).show() # ROE, operating margin, net margin
|
| 490 |
-
|
| 491 |
-
# auto-detect all available charts
|
| 492 |
-
specs = dartlab.chart.auto_chart(c)
|
| 493 |
-
dartlab.chart.chart_from_spec(specs[0]).show()
|
| 494 |
-
|
| 495 |
-
# generic charts from any DataFrame
|
| 496 |
-
dartlab.chart.line(c.dividend, y=["dps"])
|
| 497 |
-
dartlab.chart.bar(df, x="year", y=["revenue", "operating_income"], stacked=True)
|
| 498 |
-
```
|
| 499 |
-
|
| 500 |
-
Data tools:
|
| 501 |
-
|
| 502 |
-
```python
|
| 503 |
-
dartlab.table.yoy_change(c.dividend, value_cols=["dps"]) # add YoY% columns
|
| 504 |
-
dartlab.table.format_korean(c.BS, unit="๋ฐฑ๋ง์") # 1.2์กฐ์, 350์ต์
|
| 505 |
-
dartlab.table.summary_stats(c.dividend, value_cols=["dps"]) # mean/CAGR/trend
|
| 506 |
-
dartlab.text.extract_keywords(narrative) # frequency-based keywords
|
| 507 |
-
dartlab.text.sentiment_indicators(narrative) # positive/negative/risk
|
| 508 |
-
```
|
| 509 |
-
|
| 510 |
-
Install chart dependencies: `uv add "dartlab[charts]"`
|
| 511 |
-
|
| 512 |
-
### Network โ Affiliate Map (beta)
|
| 513 |
-
|
| 514 |
-
> **Beta** โ API may change after a warning. See [stability](docs/stability.md).
|
| 515 |
-
|
| 516 |
-
```python
|
| 517 |
-
c = dartlab.Company("005930")
|
| 518 |
-
|
| 519 |
-
# interactive vis.js graph in browser
|
| 520 |
-
c.network().show() # ego view (1 hop)
|
| 521 |
-
c.network(hops=2).show() # 2-hop neighborhood
|
| 522 |
-
|
| 523 |
-
# DataFrame views
|
| 524 |
-
c.network("members") # group affiliates
|
| 525 |
-
c.network("edges") # investment/shareholder connections
|
| 526 |
-
c.network("cycles") # circular ownership paths
|
| 527 |
-
|
| 528 |
-
# full market network
|
| 529 |
-
dartlab.network().show()
|
| 530 |
-
```
|
| 531 |
-
|
| 532 |
-
### Market Scan (beta)
|
| 533 |
-
|
| 534 |
-
> **Beta** โ API may change after a warning. See [stability](docs/stability.md).
|
| 535 |
-
|
| 536 |
-
```python
|
| 537 |
-
c = dartlab.Company("005930")
|
| 538 |
-
|
| 539 |
-
# one company โ market-wide
|
| 540 |
-
c.governance() # single company
|
| 541 |
-
c.governance("all") # full market DataFrame
|
| 542 |
-
dartlab.governance() # module-level scan
|
| 543 |
-
dartlab.workforce()
|
| 544 |
-
dartlab.capital()
|
| 545 |
-
dartlab.debt()
|
| 546 |
-
|
| 547 |
-
# screening & benchmarking
|
| 548 |
-
dartlab.screen() # multi-factor screening
|
| 549 |
-
dartlab.benchmark() # peer comparison
|
| 550 |
-
dartlab.signal() # change detection signals
|
| 551 |
-
```
|
| 552 |
-
|
| 553 |
-
### Market Data Collection (beta)
|
| 554 |
-
|
| 555 |
-
> **Beta** โ API may change after a warning. See [stability](docs/stability.md).
|
| 556 |
-
|
| 557 |
-
The Gather engine collects external market data as **Polars DataFrames** โ timeseries by default. Every request goes through automatic fallback chains, circuit breaker isolation, and TTL caching. All methods are synchronous โ async parallel execution is handled internally.
|
| 558 |
-
|
| 559 |
-
```python
|
| 560 |
-
import dartlab
|
| 561 |
-
|
| 562 |
-
# OHLCV timeseries โ adjusted prices, 6000+ trading days in a single request
|
| 563 |
-
dartlab.price("005930") # KR: 1-year default, Polars DataFrame
|
| 564 |
-
dartlab.price("005930", start="2015-01-01") # custom range
|
| 565 |
-
dartlab.price("AAPL", market="US") # US via Yahoo Finance chart API
|
| 566 |
-
dartlab.price("005930", snapshot=True) # opt-in: current price snapshot
|
| 567 |
-
|
| 568 |
-
# supply/demand flow timeseries (KR only)
|
| 569 |
-
dartlab.flow("005930") # DataFrame (date, foreignNet, institutionNet, ...)
|
| 570 |
-
|
| 571 |
-
# macro indicators โ full wide DataFrame
|
| 572 |
-
dartlab.macro() # KR 12 indicators (CPI, rates, FX, production, ...)
|
| 573 |
-
dartlab.macro("US") # US 25 indicators (GDP, CPI, Fed Funds, S&P500, ...)
|
| 574 |
-
dartlab.macro("CPI") # single indicator (auto-detects KR)
|
| 575 |
-
dartlab.macro("FEDFUNDS") # single indicator (auto-detects US)
|
| 576 |
-
|
| 577 |
-
# consensus, news
|
| 578 |
-
dartlab.consensus("005930") # target price & analyst opinion
|
| 579 |
-
dartlab.news("์ผ์ฑ์ ์") # Google News RSS โ DataFrame
|
| 580 |
-
```
|
| 581 |
-
|
| 582 |
-
**How data is collected โ don't worry, it's safe:**
|
| 583 |
-
|
| 584 |
-
| Source | Data | Method |
|
| 585 |
-
|--------|------|--------|
|
| 586 |
-
| Naver Chart API | KR OHLCV (adjusted prices) | `fchart.stock.naver.com` โ 1 request per stock, max 6000 days |
|
| 587 |
-
| Yahoo Finance v8 | US/Global OHLCV | `query2.finance.yahoo.com/v8/finance/chart` โ public chart API |
|
| 588 |
-
| ECOS (Bank of Korea) | KR macro indicators | Official API with user's own key |
|
| 589 |
-
| FRED (St. Louis Fed) | US macro indicators | Official API with user's own key |
|
| 590 |
-
| Naver Mobile API | Consensus, flow, sector PER | `m.stock.naver.com/api` โ JSON endpoints |
|
| 591 |
-
| FMP | Fallback for US history | Financial Modeling Prep API (optional) |
|
| 592 |
-
|
| 593 |
-
**Safety infrastructure:**
|
| 594 |
-
|
| 595 |
-
- **Rate limiting** โ per-domain RPM caps (Naver 30, ECOS 30, FRED 120) with async queue
|
| 596 |
-
- **Circuit breaker** โ 3 consecutive failures โ source disabled for 60s, half-open retry
|
| 597 |
-
- **Fallback chains** โ KR: naver โ yahoo_direct โ yahoo / US: yahoo_direct โ fmp โ yahoo
|
| 598 |
-
- **Stale-while-revalidate** โ returns cached data on failure, warns via `log.warning`
|
| 599 |
-
- **User-Agent rotation** โ randomized per request to avoid fingerprinting
|
| 600 |
-
- **No silent failures** โ all API errors logged at warning level, never swallowed
|
| 601 |
-
- **No scraping** โ all sources are public APIs or official data endpoints
|
| 602 |
-
|
| 603 |
-
### Cross-Border Analysis (beta)
|
| 604 |
-
|
| 605 |
-
> **Beta** โ API may change after a warning. See [stability](docs/stability.md).
|
| 606 |
-
|
| 607 |
-
```python
|
| 608 |
-
c = dartlab.Company("005930")
|
| 609 |
-
|
| 610 |
-
# keyword frequency across disclosure periods
|
| 611 |
-
c.keywordTrend(keyword="AI") # topic ร period ร keyword count
|
| 612 |
-
c.keywordTrend() # all 54 built-in keywords
|
| 613 |
-
|
| 614 |
-
# news headlines
|
| 615 |
-
c.news() # recent 30 days
|
| 616 |
-
dartlab.news("AAPL", market="US") # US company news
|
| 617 |
-
|
| 618 |
-
# global peer mapping (WICS โ GICS sector)
|
| 619 |
-
dartlab.crossBorderPeers("005930") # โ ["AAPL", "MSFT", "NVDA", "TSM", "AVGO"]
|
| 620 |
-
|
| 621 |
-
# currency conversion (FRED-based)
|
| 622 |
-
from dartlab.engines.common.finance import getExchangeRate, convertValue
|
| 623 |
-
getExchangeRate("KRW") # KRW/USD rate
|
| 624 |
-
convertValue(1_000_000, "KRW", "USD") # โ ~730.0
|
| 625 |
-
|
| 626 |
-
# audit opinion normalization (KR/EN/JP โ canonical code)
|
| 627 |
-
from dartlab.engines.common.audit import normalizeAuditOpinion
|
| 628 |
-
normalizeAuditOpinion("์ ์ ") # โ "unqualified"
|
| 629 |
-
normalizeAuditOpinion("Qualified") # โ "qualified"
|
| 630 |
-
```
|
| 631 |
-
|
| 632 |
-
Disclosure gap detection runs automatically inside `c.insights` โ flags mismatches between text changes and financial health (e.g. risk text surges while financials are stable).
|
| 633 |
-
|
| 634 |
-
### Export (experimental)
|
| 635 |
-
|
| 636 |
-
> **Experimental** โ Breaking changes possible. Not for production.
|
| 637 |
-
|
| 638 |
-
```bash
|
| 639 |
-
dartlab excel "005930" -o samsung.xlsx
|
| 640 |
-
```
|
| 641 |
-
|
| 642 |
-
Install: `uv add "dartlab[ai]"` (Excel export is included in the AI extras).
|
| 643 |
-
|
| 644 |
-
### Plugins
|
| 645 |
-
|
| 646 |
-
```python
|
| 647 |
-
dartlab.plugins() # list loaded plugins
|
| 648 |
-
dartlab.reload_plugins() # rescan after installing a plugin
|
| 649 |
-
```
|
| 650 |
-
|
| 651 |
-
Plugins can extend DartLab with custom data sources, tools, or analysis engines. See `dartlab plugin create --help` for scaffolding.
|
| 652 |
-
|
| 653 |
-
## EDGAR (US)
|
| 654 |
-
|
| 655 |
-
Same `Company` interface, same account standardization pipeline, different data source. EDGAR data is auto-fetched from the SEC API โ no pre-download needed:
|
| 656 |
-
|
| 657 |
-
```python
|
| 658 |
-
us = dartlab.Company("AAPL")
|
| 659 |
-
|
| 660 |
-
us.sections # 10-K/10-Q sections with heading/body
|
| 661 |
-
us.show("business") # business description
|
| 662 |
-
us.show("10-K::item1ARiskFactors") # risk factors
|
| 663 |
-
us.BS # SEC XBRL balance sheet
|
| 664 |
-
us.ratios # same 47 ratios
|
| 665 |
-
us.diff("10-K::item7Mdna") # MD&A text changes
|
| 666 |
-
us.insights # 10-area grades (A~F)
|
| 667 |
-
|
| 668 |
-
# analyst functions โ auto-detect USD
|
| 669 |
-
dartlab.valuation("AAPL") # DCF + DDM + relative (USD)
|
| 670 |
-
dartlab.forecast("AAPL") # revenue forecast (USD)
|
| 671 |
-
dartlab.simulation("AAPL") # scenario simulation (US macro presets)
|
| 672 |
-
```
|
| 673 |
-
|
| 674 |
-
The interface is identical โ same methods, same structure:
|
| 675 |
-
|
| 676 |
-
```python
|
| 677 |
-
# Korea (DART) # US (EDGAR)
|
| 678 |
-
c = dartlab.Company("005930") c = dartlab.Company("AAPL")
|
| 679 |
-
c.sections c.sections
|
| 680 |
-
c.show("businessOverview") c.show("business")
|
| 681 |
-
c.BS c.BS
|
| 682 |
-
c.ratios c.ratios
|
| 683 |
-
c.diff("businessOverview") c.diff("10-K::item7Mdna")
|
| 684 |
-
c.insights.grades() c.insights.grades()
|
| 685 |
-
```
|
| 686 |
-
|
| 687 |
-
### DART vs EDGAR Namespaces
|
| 688 |
-
|
| 689 |
-
| | DART | EDGAR |
|
| 690 |
-
|---------------|:--------------:|:--------------:|
|
| 691 |
-
| `docs` | โ | โ |
|
| 692 |
-
| `finance` | โ | โ |
|
| 693 |
-
| `report` | โ (28 API types) | โ (not applicable) |
|
| 694 |
-
| `profile` | โ | โ |
|
| 695 |
-
|
| 696 |
-
DART has a `report` namespace with 28 structured disclosure APIs (dividend, governance, executive compensation, etc.). This does not exist in EDGAR โ SEC filings are structured differently.
|
| 697 |
-
|
| 698 |
-
**EDGAR topic naming**: Topics use `{formType}::{itemId}` format. Short aliases also work:
|
| 699 |
-
|
| 700 |
-
```python
|
| 701 |
-
us.show("10-K::item1Business") # full form
|
| 702 |
-
us.show("business") # short alias
|
| 703 |
-
us.show("risk") # โ 10-K::item1ARiskFactors
|
| 704 |
-
us.show("mdna") # โ 10-K::item7Mdna
|
| 705 |
-
```
|
| 706 |
-
|
| 707 |
-
## AI Analysis
|
| 708 |
-
|
| 709 |
-
> **Experimental** โ the AI analysis layer and `analysis/` engines are under active development. APIs, output formats, and available tools may change between versions.
|
| 710 |
-
|
| 711 |
-
> **Tip:** New to financial analysis or prefer natural language? Use `dartlab.ask()` โ the AI assistant handles everything from data download to analysis. No coding knowledge required.
|
| 712 |
-
|
| 713 |
-
DartLab includes a built-in AI analysis layer that feeds structured company data to LLMs. **No code required** โ you can ask questions in plain language and DartLab handles everything: data selection, context assembly, and streaming the answer.
|
| 714 |
-
|
| 715 |
-
```bash
|
| 716 |
-
# terminal one-liner โ no Python needed
|
| 717 |
-
dartlab ask "์ผ์ฑ์ ์ ์ฌ๋ฌด๊ฑด์ ์ฑ ๋ถ์ํด์ค"
|
| 718 |
-
```
|
| 719 |
-
|
| 720 |
-
DartLab structures the data, selects relevant context (financials, insights, sector benchmarks), and lets the LLM explain:
|
| 721 |
-
|
| 722 |
-
```
|
| 723 |
-
$ dartlab ask "์ผ์ฑ์ ์ ์ฌ๋ฌด๊ฑด์ ์ฑ ๋ถ์ํด์ค"
|
| 724 |
-
|
| 725 |
-
์ผ์ฑ์ ์์ ์ฌ๋ฌด๊ฑด์ ์ฑ์ A๋ฑ๊ธ์
๋๋ค.
|
| 726 |
-
|
| 727 |
-
โธ ๋ถ์ฑ๋น์จ 31.8% โ ์
์ข
ํ๊ท (45.2%) ๋๋น ์ํธ
|
| 728 |
-
โธ ์ ๋๋น์จ 258.6% โ 200% ์์ ๊ธฐ์ค ์ํ
|
| 729 |
-
โธ ์ด์๋ณด์๋ฐฐ์ 22.1๋ฐฐ โ ์ด์ ๋ถ๋ด ๋งค์ฐ ๋ฎ์
|
| 730 |
-
โธ ROE ํ๋ณต์ธ: 1.6% โ 10.2% (4๋ถ๊ธฐ ์ฐ์ ๊ฐ์ )
|
| 731 |
-
|
| 732 |
-
[๋ฐ์ดํฐ ์ถ์ฒ: 2024Q4 ์ฌ์
๋ณด๊ณ ์, dartlab insights ์์ง]
|
| 733 |
-
```
|
| 734 |
-
|
| 735 |
-
For real-time market-wide disclosure questions (e.g. "์ต๊ทผ 7์ผ ์์ฃผ๊ณต์ ์๋ ค์ค"), the AI uses your `OpenDART API key` to search recent filings directly. Store the key in project `.env` or via UI Settings.
|
| 736 |
-
|
| 737 |
-
The 2-tier architecture means basic analysis works with any provider, while tool-calling providers (OpenAI, Claude) can go deeper by requesting additional data mid-conversation.
|
| 738 |
-
|
| 739 |
-
### Python API
|
| 740 |
-
|
| 741 |
-
```python
|
| 742 |
-
import dartlab
|
| 743 |
-
|
| 744 |
-
# streams to stdout, returns full text
|
| 745 |
-
answer = dartlab.ask("์ผ์ฑ์ ์ ์ฌ๋ฌด๊ฑด์ ์ฑ ๋ถ์ํด์ค")
|
| 746 |
-
|
| 747 |
-
# provider + model override
|
| 748 |
-
answer = dartlab.ask("์ผ์ฑ์ ์ ๋ถ์", provider="openai", model="gpt-4o")
|
| 749 |
-
|
| 750 |
-
# data filtering
|
| 751 |
-
answer = dartlab.ask("์ผ์ฑ์ ์ ํต์ฌ ํฌ์ธํธ", include=["BS", "IS"])
|
| 752 |
-
|
| 753 |
-
# analysis pattern (framework-guided)
|
| 754 |
-
answer = dartlab.ask("์ผ์ฑ์ ์ ๋ถ์", pattern="financial")
|
| 755 |
-
|
| 756 |
-
# agent mode โ LLM selects tools for deeper analysis
|
| 757 |
-
answer = dartlab.chat("005930", "๋ฐฐ๋น ์ถ์ธ๋ฅผ ๋ถ์ํ๊ณ ์ด์ ์งํ๋ฅผ ์ฐพ์์ค")
|
| 758 |
-
```
|
| 759 |
-
|
| 760 |
-
### CLI
|
| 761 |
-
|
| 762 |
-
```bash
|
| 763 |
-
# provider setup โ free providers first
|
| 764 |
-
dartlab setup # list all providers
|
| 765 |
-
dartlab setup gemini # Google Gemini (free)
|
| 766 |
-
dartlab setup groq # Groq (free)
|
| 767 |
-
|
| 768 |
-
# status
|
| 769 |
-
dartlab status # all providers (table view)
|
| 770 |
-
dartlab status --cost # cumulative token/cost stats
|
| 771 |
-
|
| 772 |
-
# ask questions (streaming by default)
|
| 773 |
-
dartlab ask "์ผ์ฑ์ ์ ์ฌ๋ฌด๊ฑด์ ์ฑ ๋ถ์ํด์ค"
|
| 774 |
-
dartlab ask "AAPL risk analysis" -p ollama
|
| 775 |
-
dartlab ask --continue "๋ฐฐ๋น ์ถ์ธ๋?"
|
| 776 |
-
|
| 777 |
-
# auto-generate report
|
| 778 |
-
dartlab report "์ผ์ฑ์ ์" -o report.md
|
| 779 |
-
|
| 780 |
-
# web UI
|
| 781 |
-
dartlab # open browser UI
|
| 782 |
-
dartlab --help # show all commands
|
| 783 |
-
```
|
| 784 |
-
|
| 785 |
-
<details>
|
| 786 |
-
<summary>All CLI commands (16)</summary>
|
| 787 |
-
|
| 788 |
-
| Category | Command | Description |
|
| 789 |
-
|----------|---------|-------------|
|
| 790 |
-
| Data | `show` | Open any topic by name |
|
| 791 |
-
| Data | `search` | Find companies by name or code |
|
| 792 |
-
| Data | `statement` | BS / IS / CF / SCE output |
|
| 793 |
-
| Data | `sections` | Raw docs sections |
|
| 794 |
-
| Data | `profile` | Company index and facts |
|
| 795 |
-
| Data | `modules` | List all available modules |
|
| 796 |
-
| AI | `ask` | Natural language question |
|
| 797 |
-
| AI | `report` | Auto-generate analysis report |
|
| 798 |
-
| Export | `excel` | Export to Excel (experimental) |
|
| 799 |
-
| Collect | `collect` | Download / refresh / batch collect |
|
| 800 |
-
| Collect | `collect --check` | Check freshness (new filings) |
|
| 801 |
-
| Collect | `collect --incremental` | Incremental collect (missing only) |
|
| 802 |
-
| Server | `ai` | Launch web UI (localhost:8400) |
|
| 803 |
-
| Server | `share` | Tunnel sharing (ngrok / cloudflared) |
|
| 804 |
-
| Server | `status` | Provider connection status |
|
| 805 |
-
| Server | `setup` | Provider setup wizard |
|
| 806 |
-
| MCP | `mcp` | Start MCP stdio server |
|
| 807 |
-
| Plugin | `plugin` | Create / list plugins |
|
| 808 |
-
|
| 809 |
-
</details>
|
| 810 |
-
|
| 811 |
-
### Providers
|
| 812 |
-
|
| 813 |
-
**Free API key providers** โ sign up, paste the key, start analyzing:
|
| 814 |
-
|
| 815 |
-
| Provider | Free Tier | Model | Setup |
|
| 816 |
-
|----------|-----------|-------|-------|
|
| 817 |
-
| `gemini` | Gemini 2.5 Pro/Flash free | Gemini 2.5 | `dartlab setup gemini` |
|
| 818 |
-
| `groq` | 6Kโ30K TPM free | LLaMA 3.3 70B | `dartlab setup groq` |
|
| 819 |
-
| `cerebras` | 1M tokens/day permanent | LLaMA 3.3 70B | `dartlab setup cerebras` |
|
| 820 |
-
| `mistral` | 1B tokens/month free | Mistral Small | `dartlab setup mistral` |
|
| 821 |
-
|
| 822 |
-
**Other providers:**
|
| 823 |
-
|
| 824 |
-
| Provider | Auth | Cost | Tool Calling |
|
| 825 |
-
|----------|------|------|:---:|
|
| 826 |
-
| `oauth-codex` | ChatGPT subscription (Plus/Team/Enterprise) | Included in subscription | Yes |
|
| 827 |
-
| `openai` | API key (`OPENAI_API_KEY`) | Pay-per-token | Yes |
|
| 828 |
-
| `ollama` | Local install, no account needed | Free | Depends on model |
|
| 829 |
-
| `codex` | Codex CLI installed locally | Free (uses your Codex session) | Yes |
|
| 830 |
-
| `custom` | Any OpenAI-compatible endpoint | Varies | Varies |
|
| 831 |
-
|
| 832 |
-
**Auto-fallback:** Set multiple free API keys and DartLab automatically switches to the next provider when one hits its rate limit. Use `provider="free"` to enable the fallback chain:
|
| 833 |
-
|
| 834 |
-
```python
|
| 835 |
-
dartlab.ask("์ผ์ฑ์ ์ ๋ถ์", provider="free")
|
| 836 |
-
```
|
| 837 |
-
|
| 838 |
-
**Why no Claude provider?** Anthropic does not offer OAuth-based access. Without OAuth, there is no way to let users authenticate with their existing subscription โ we would have to ask users to paste API keys, which goes against DartLab's frictionless design. If Anthropic adds OAuth support in the future, we will add a Claude provider. For now, Claude works through **MCP** (see below) โ Claude Desktop, Claude Code, and Cursor can call DartLab's 60 tools directly.
|
| 839 |
-
|
| 840 |
-
**`oauth-codex`** is the recommended provider โ if you have a ChatGPT subscription, it works out of the box with no API keys. Run `dartlab setup oauth-codex` to authenticate.
|
| 841 |
-
|
| 842 |
-
**Web UI (`dartlab`)** launches a browser-based chat interface for interactive analysis. This feature is currently **experimental** โ we are evaluating the right scope and UX for visualization and collaborative features.
|
| 843 |
-
|
| 844 |
-
Install AI dependencies: `uv add "dartlab[ai]"`
|
| 845 |
-
|
| 846 |
-
### Project Settings (`.dartlab.yml`)
|
| 847 |
-
|
| 848 |
-
```yaml
|
| 849 |
-
company: 005930 # default company
|
| 850 |
-
provider: openai # default LLM provider
|
| 851 |
-
model: gpt-4o # default model
|
| 852 |
-
verbose: false
|
| 853 |
-
```
|
| 854 |
-
|
| 855 |
-
## MCP โ AI Assistant Integration
|
| 856 |
-
|
| 857 |
-
DartLab includes a built-in [MCP](https://modelcontextprotocol.io/) server that exposes 60 tools (16 global + 44 per-company) to Claude Desktop, Claude Code, Cursor, and any MCP-compatible client.
|
| 858 |
-
|
| 859 |
-
```bash
|
| 860 |
-
uv add "dartlab[mcp]"
|
| 861 |
-
```
|
| 862 |
-
|
| 863 |
-
### Claude Desktop
|
| 864 |
-
|
| 865 |
-
Add to `claude_desktop_config.json`:
|
| 866 |
-
|
| 867 |
-
```json
|
| 868 |
-
{
|
| 869 |
-
"mcpServers": {
|
| 870 |
-
"dartlab": {
|
| 871 |
-
"command": "uv",
|
| 872 |
-
"args": ["run", "dartlab", "mcp"]
|
| 873 |
-
}
|
| 874 |
-
}
|
| 875 |
-
}
|
| 876 |
-
```
|
| 877 |
-
|
| 878 |
-
### Claude Code
|
| 879 |
-
|
| 880 |
-
```bash
|
| 881 |
-
claude mcp add dartlab -- uv run dartlab mcp
|
| 882 |
-
```
|
| 883 |
-
|
| 884 |
-
Or add to `~/.claude/settings.json`:
|
| 885 |
-
|
| 886 |
-
```json
|
| 887 |
-
{
|
| 888 |
-
"mcpServers": {
|
| 889 |
-
"dartlab": {
|
| 890 |
-
"command": "uv",
|
| 891 |
-
"args": ["run", "dartlab", "mcp"]
|
| 892 |
-
}
|
| 893 |
-
}
|
| 894 |
-
}
|
| 895 |
-
```
|
| 896 |
-
|
| 897 |
-
### Cursor
|
| 898 |
-
|
| 899 |
-
Add to `.cursor/mcp.json` with the same config format as Claude Desktop.
|
| 900 |
-
|
| 901 |
-
### What's Available
|
| 902 |
-
|
| 903 |
-
Once connected, your AI assistant can:
|
| 904 |
-
|
| 905 |
-
- **Search** โ find companies by name or code (`search_company`)
|
| 906 |
-
- **Show** โ read any disclosure topic (`show_topic`, `list_topics`, `diff_topic`)
|
| 907 |
-
- **Finance** โ balance sheet, income statement, cash flow, ratios (`get_financial_statements`, `get_ratios`)
|
| 908 |
-
- **Analysis** โ insights, sector ranking, valuation (`get_insight`, `get_ranking`)
|
| 909 |
-
- **EDGAR** โ same tools work for US companies (`stock_code: "AAPL"`)
|
| 910 |
-
|
| 911 |
-
Auto-generate config for your platform:
|
| 912 |
-
|
| 913 |
-
```bash
|
| 914 |
-
dartlab mcp --config claude-desktop
|
| 915 |
-
dartlab mcp --config claude-code
|
| 916 |
-
dartlab mcp --config cursor
|
| 917 |
-
```
|
| 918 |
-
|
| 919 |
-
## OpenAPI โ Raw Public APIs
|
| 920 |
-
|
| 921 |
-
Use source-native wrappers when you want raw disclosure APIs directly.
|
| 922 |
-
|
| 923 |
-
### OpenDart (Korea)
|
| 924 |
-
|
| 925 |
-
> **Note:** `Company` does **not** require an API key โ it uses pre-built datasets.
|
| 926 |
-
> `OpenDart` uses the raw DART API and requires a key from [opendart.fss.or.kr](https://opendart.fss.or.kr) (free).
|
| 927 |
-
> Recent filing-list AI questions across the whole market also use this key. In the UI, open Settings and manage `OpenDART API key` there.
|
| 928 |
-
|
| 929 |
-
```python
|
| 930 |
-
from dartlab import OpenDart
|
| 931 |
-
|
| 932 |
-
d = OpenDart()
|
| 933 |
-
d.search("์นด์นด์ค", listed=True)
|
| 934 |
-
d.filings("์ผ์ฑ์ ์", "2024")
|
| 935 |
-
d.finstate("์ผ์ฑ์ ์", 2024)
|
| 936 |
-
d.report("์ผ์ฑ์ ์", "๋ฐฐ๋น", 2024)
|
| 937 |
-
```
|
| 938 |
-
|
| 939 |
-
### OpenEdgar (US)
|
| 940 |
-
|
| 941 |
-
> **No API key required.** SEC EDGAR is a public API โ no registration needed.
|
| 942 |
-
|
| 943 |
-
```python
|
| 944 |
-
from dartlab import OpenEdgar
|
| 945 |
-
|
| 946 |
-
e = OpenEdgar()
|
| 947 |
-
e.search("Apple")
|
| 948 |
-
e.filings("AAPL", forms=["10-K", "10-Q"])
|
| 949 |
-
e.companyFactsJson("AAPL")
|
| 950 |
-
```
|
| 951 |
-
|
| 952 |
-
## Data
|
| 953 |
-
|
| 954 |
-
**No manual setup required.** When you create a `Company`, dartlab automatically downloads the required data.
|
| 955 |
-
|
| 956 |
-
| Dataset | Coverage | Size | Source |
|
| 957 |
-
|---------|----------|------|--------|
|
| 958 |
-
| DART docs | 2,500+ companies | ~8 GB | [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data/tree/main/dart/docs) |
|
| 959 |
-
| DART finance | 2,700+ companies | ~600 MB | [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data/tree/main/dart/finance) |
|
| 960 |
-
| DART report | 2,700+ companies | ~320 MB | [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data/tree/main/dart/report) |
|
| 961 |
-
| EDGAR | On-demand | โ | SEC API (auto-fetched) |
|
| 962 |
-
|
| 963 |
-
### 3-Step Data Pipeline
|
| 964 |
-
|
| 965 |
-
```
|
| 966 |
-
dartlab.Company("005930")
|
| 967 |
-
โ
|
| 968 |
-
โโ 1. Local cache โโโโ already have it? done (instant)
|
| 969 |
-
โ
|
| 970 |
-
โโ 2. HuggingFace โโโโ auto-download (~seconds, no key needed)
|
| 971 |
-
โ
|
| 972 |
-
โโ 3. DART API โโโโโโโโ collect with your API key (needs key)
|
| 973 |
-
```
|
| 974 |
-
|
| 975 |
-
If a company is not in HuggingFace, dartlab collects data directly from DART โ this requires an API key:
|
| 976 |
-
|
| 977 |
-
```bash
|
| 978 |
-
dartlab setup dart-key
|
| 979 |
-
```
|
| 980 |
-
|
| 981 |
-
### Freshness โ Automatic Update Detection
|
| 982 |
-
|
| 983 |
-
DartLab uses a 3-layer freshness system to keep your local data current:
|
| 984 |
-
|
| 985 |
-
| Layer | Method | Cost |
|
| 986 |
-
|-------|--------|------|
|
| 987 |
-
| L1 | HTTP HEAD โ ETag comparison with HuggingFace | ~0.5s, few hundred bytes |
|
| 988 |
-
| L2 | Local file age (90-day TTL fallback) | instant (local) |
|
| 989 |
-
| L3 | DART API โ `rcept_no` diff (requires API key) | 1 API call, ~1s |
|
| 990 |
-
|
| 991 |
-
When you open a `Company`, dartlab checks if newer data exists. If a new disclosure was filed:
|
| 992 |
-
|
| 993 |
-
```python
|
| 994 |
-
c = dartlab.Company("005930")
|
| 995 |
-
# [dartlab] โ 005930 โ ์ ๊ณต์ 2๊ฑด ๋ฐ๊ฒฌ (์ฌ์
๋ณด๊ณ ์ (2024.12))
|
| 996 |
-
# โข ์ฆ๋ถ ์์ง: dartlab collect --incremental 005930
|
| 997 |
-
# โข ๋๋ Python: c.update()
|
| 998 |
-
|
| 999 |
-
c.update() # incremental collect โ only missing filings
|
| 1000 |
-
```
|
| 1001 |
-
|
| 1002 |
-
```bash
|
| 1003 |
-
# CLI freshness check
|
| 1004 |
-
dartlab collect --check 005930 # single company
|
| 1005 |
-
dartlab collect --check # scan all local companies (7 days)
|
| 1006 |
-
|
| 1007 |
-
# incremental collect โ only missing filings
|
| 1008 |
-
dartlab collect --incremental 005930 # single company
|
| 1009 |
-
dartlab collect --incremental # all local companies with new filings
|
| 1010 |
-
```
|
| 1011 |
-
|
| 1012 |
-
### Batch Collection (DART API)
|
| 1013 |
-
|
| 1014 |
-
```bash
|
| 1015 |
-
dartlab collect --batch # all listed, missing only
|
| 1016 |
-
dartlab collect --batch -c finance 005930 # specific category + company
|
| 1017 |
-
dartlab collect --batch --mode all # re-collect everything
|
| 1018 |
-
```
|
| 1019 |
-
|
| 1020 |
-
## Try It Now
|
| 1021 |
-
|
| 1022 |
-
### Live Demo (No Install)
|
| 1023 |
-
|
| 1024 |
-
Try DartLab instantly โ no Python, no terminal, no setup:
|
| 1025 |
-
|
| 1026 |
-
**[โ Open Live Demo](https://huggingface.co/spaces/eddmpython/dartlab)** โ enter a stock code, see financials immediately
|
| 1027 |
-
|
| 1028 |
-
Or open a [Colab notebook](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/01_quickstart.ipynb) in your browser.
|
| 1029 |
-
|
| 1030 |
-
### Marimo Notebooks
|
| 1031 |
-
|
| 1032 |
-
> Data is automatically downloaded on first use. No setup required unless collecting new companies directly from DART.
|
| 1033 |
-
|
| 1034 |
-
```bash
|
| 1035 |
-
uv add dartlab marimo
|
| 1036 |
-
marimo edit notebooks/marimo/dartCompany.py # Korean company (DART)
|
| 1037 |
-
marimo edit notebooks/marimo/edgarCompany.py # US company (EDGAR)
|
| 1038 |
-
marimo edit notebooks/marimo/aiAnalysis.py # AI analysis examples
|
| 1039 |
-
```
|
| 1040 |
-
|
| 1041 |
-
### Colab Notebooks
|
| 1042 |
-
|
| 1043 |
-
**Showcase** (English โ global audience):
|
| 1044 |
-
|
| 1045 |
-
| Notebook | Topic |
|
| 1046 |
-
|---|---|
|
| 1047 |
-
| [](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/01_quickstart.ipynb) | **Quick Start** โ analyze any company in 3 lines |
|
| 1048 |
-
| [](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/02_financial_analysis.ipynb) | **Financial Analysis** โ statements, time series, ratios |
|
| 1049 |
-
| [](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/03_kr_us_compare.ipynb) | **Korea vs US** โ Samsung vs Apple side-by-side |
|
| 1050 |
-
| [](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/04_risk_diff.ipynb) | **Risk Diff** โ track disclosure changes (Bloomberg can't) |
|
| 1051 |
-
| [](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/05_sector_screening.ipynb) | **Sector Screening** โ 8 presets, sector benchmarks |
|
| 1052 |
-
| [](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/06_insight_anomaly.ipynb) | **Insight & Anomaly** โ 10-area grading, 6 anomaly rules |
|
| 1053 |
-
| [](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/07_network_governance.ipynb) | **Network & Governance** โ corporate relationship graph |
|
| 1054 |
-
| [](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/08_signal_trend.ipynb) | **Signal Trends** โ 48-keyword disclosure monitoring |
|
| 1055 |
-
| [](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/09_ai_analysis.ipynb) | **AI Analysis** โ `dartlab.ask()` with 9 LLM providers |
|
| 1056 |
-
| [](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/10_disclosure_deep_dive.ipynb) | **Disclosure Deep Dive** โ sections architecture |
|
| 1057 |
-
|
| 1058 |
-
<details>
|
| 1059 |
-
<summary>ํ๊ตญ์ด Tutorials</summary>
|
| 1060 |
-
|
| 1061 |
-
| Notebook | Topic |
|
| 1062 |
-
|---|---|
|
| 1063 |
-
| [](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/getting-started/quickstart.ipynb) | **๋น ๋ฅธ ์์** โ sections, show, trace, diff |
|
| 1064 |
-
| [](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/tutorials/02_financial_statements.ipynb) | **์ฌ๋ฌด์ ํ** โ BS, IS, CF |
|
| 1065 |
-
| [](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/tutorials/04_ratios.ipynb) | **์ฌ๋ฌด๋น์จ** โ 47๊ฐ ๋น์จ |
|
| 1066 |
-
| [](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/tutorials/06_disclosure.ipynb) | **๊ณต์ ํ
์คํธ** โ sections ํ์ฑ |
|
| 1067 |
-
| [](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/tutorials/09_edgar.ipynb) | **EDGAR** โ ๋ฏธ๊ตญ SEC |
|
| 1068 |
-
|
| 1069 |
-
</details>
|
| 1070 |
-
|
| 1071 |
-
## Documentation
|
| 1072 |
-
|
| 1073 |
-
- Docs: https://eddmpython.github.io/dartlab/
|
| 1074 |
-
- Sections guide: https://eddmpython.github.io/dartlab/docs/getting-started/sections
|
| 1075 |
-
- Quick start: https://eddmpython.github.io/dartlab/docs/getting-started/quickstart
|
| 1076 |
-
- API overview: https://eddmpython.github.io/dartlab/docs/api/overview
|
| 1077 |
-
- Beginner guide (Korean): https://eddmpython.github.io/dartlab/blog/dartlab-easy-start/
|
| 1078 |
-
|
| 1079 |
-
### Blog
|
| 1080 |
-
|
| 1081 |
-
The [DartLab Blog](https://eddmpython.github.io/dartlab/blog/) covers practical disclosure analysis โ how to read reports, interpret patterns, and spot risk signals. 120+ articles across three categories:
|
| 1082 |
-
|
| 1083 |
-
- **Disclosure Systems** โ structure and mechanics of DART/EDGAR filings
|
| 1084 |
-
- **Report Reading** โ practical guide to audit reports, preliminary earnings, restatements
|
| 1085 |
-
- **Financial Interpretation** โ financial statements, ratios, and disclosure signals
|
| 1086 |
-
|
| 1087 |
-
## Stability
|
| 1088 |
-
|
| 1089 |
-
| Tier | Scope |
|
| 1090 |
-
|------|-------|
|
| 1091 |
-
| **Stable** | DART Company (sections, show, trace, diff, BS/IS/CF, CIS, index, filings, profile), EDGAR Company core, valuation, forecast, simulation |
|
| 1092 |
-
| **Beta** | EDGAR power-user (SCE, notes, freq, coverage), insights, distress, ratios, timeseries, network, governance, workforce, capital, debt, chart/table/text tools, ask/chat, OpenDart, OpenEdgar, Server API, MCP, CLI subcommands |
|
| 1093 |
-
| **Experimental** | AI tool calling, export |
|
| 1094 |
-
| **Alpha** | Desktop App (Windows .exe) โ functional but incomplete, Sections Viewer โ not yet fully structured |
|
| 1095 |
-
|
| 1096 |
-
See [docs/stability.md](docs/stability.md).
|
| 1097 |
-
|
| 1098 |
-
## Contributing
|
| 1099 |
-
|
| 1100 |
-
The project prefers **experiments before engine changes**. If you want to propose a parser or mapping change, validate it in `experiments/` first and bring the verified result back into the engine.
|
| 1101 |
-
|
| 1102 |
-
- **Experiment folder**: `experiments/XXX_camelCaseName/` โ each file must be independently runnable with actual results in its docstring
|
| 1103 |
-
- **Data contributions** (e.g. `accountMappings.json`, `sectionMappings.json`): only accepted when backed by experiment evidence โ no manual bulk edits
|
| 1104 |
-
- Issues and PRs in Korean or English are both welcome
|
| 1105 |
-
|
| 1106 |
-
## License
|
| 1107 |
-
|
| 1108 |
-
MIT
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pyproject.toml
CHANGED
|
@@ -59,6 +59,7 @@ dependencies = [
|
|
| 59 |
"orjson>=3.10.0,<4",
|
| 60 |
"polars>=1.0.0,<2",
|
| 61 |
"requests>=2.32.5,<3",
|
|
|
|
| 62 |
"rich>=14.3.3,<15",
|
| 63 |
"plotly>=5.0.0,<6",
|
| 64 |
"mcp[cli]>=1.0",
|
|
|
|
| 59 |
"orjson>=3.10.0,<4",
|
| 60 |
"polars>=1.0.0,<2",
|
| 61 |
"requests>=2.32.5,<3",
|
| 62 |
+
"prompt-toolkit>=3.0,<4",
|
| 63 |
"rich>=14.3.3,<15",
|
| 64 |
"plotly>=5.0.0,<6",
|
| 65 |
"mcp[cli]>=1.0",
|
src/dartlab/ai/DEV.md
CHANGED
|
@@ -1,5 +1,77 @@
|
|
| 1 |
# AI Engine Development Guide
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
## Source Of Truth
|
| 4 |
|
| 5 |
- ๋ฐ์ดํฐ source-of-truth: `src/dartlab/core/registry.py`
|
|
|
|
| 1 |
# AI Engine Development Guide
|
| 2 |
|
| 3 |
+
## ์ค๊ณ ์ฌ์
|
| 4 |
+
|
| 5 |
+
### dartlab AI๋ ๋ฌด์์ธ๊ฐ
|
| 6 |
+
|
| 7 |
+
dartlab์ ํต์ฌ ์์ฐ์ ๋ฐ์ดํฐ ์์ง์ด๋ค. ์ ์๊ณต์ ์๋ณธ์ ์ ๊ทํํ์ฌ **์ ๊ธฐ๊ฐ ๋น๊ต๊ฐ๋ฅ + ๊ธฐ์
๊ฐ ๋น๊ต๊ฐ๋ฅ**ํ ๊ตฌ์กฐ๋ก ๋ง๋ ๊ฒ์ด dartlab์ ์กด์ฌ ์ด์ ๋ค. AI๋ ์ด ๋ฐ์ดํฐ ์์์ ๋์ํ๋ **์๋น์**์ด์ง, ๋ฐ์ดํฐ๋ฅผ ๋์ฒดํ์ง ์๋๋ค.
|
| 8 |
+
|
| 9 |
+
**LLM์ ํด์์์ด์ง ๋ถ์๊ฐ๊ฐ ์๋๋ค.**
|
| 10 |
+
- ๊ณ์ฐ์ ์์ง์ด ํ๋ค (ratios, timeseries, insights, valuation)
|
| 11 |
+
- ํ๋จ์ ์์ง์ด ํ๋ค (anomaly detection, scoring, red flags)
|
| 12 |
+
- LLM์ ์์ง ๊ฒฐ๊ณผ๋ฅผ ๋ฐ์์ **"์"๋ฅผ ์ค๋ช
ํ๊ณ , ์ธ๊ณผ ๊ด๊ณ๋ฅผ ์์ ํ๊ณ , ์ฌ์ฉ์ ์ง๋ฌธ์ ๋ตํ๋ค**
|
| 13 |
+
|
| 14 |
+
์ด๊ฒ์ด dexter์์ ๊ทผ๋ณธ์ ์ฐจ์ด๋ค:
|
| 15 |
+
- dexter: ๋ฐ์ดํฐ ์์. LLM์ด ์ธ๋ถ API๋ฅผ ํธ์ถํด์ ๋ฐ์ดํฐ๋ฅผ ์์งํ๊ณ ๋ถ์. LLM์ด ์ ๋ถ.
|
| 16 |
+
- dartlab: ๋ฐ์ดํฐ ์์ง์ด ์ ๋ถ. LLM์ ์ ๊ทํ๋ ๋ฐ์ดํฐ๋ฅผ ์ฝ๊ณ ํด์ํ๋ ๋ง์ง๋ง ๊ณ์ธต.
|
| 17 |
+
|
| 18 |
+
### 2-Tier ์ํคํ
์ฒ
|
| 19 |
+
|
| 20 |
+
- **Tier 1 (์์คํ
์ฃผ๋)**: ์ง๋ฌธ ๋ถ๋ฅ โ ์์ง ๊ณ์ฐ โ ๊ฒฐ๊ณผ๋ฅผ ์ปจํ
์คํธ๋ก ์กฐ๋ฆฝ โ LLM์ ํ ๋ฒ ์ ๋ฌ. ๋ชจ๋ provider์์ ๋์. tool calling ๋ถํ์.
|
| 21 |
+
- **Tier 2 (LLM ์ฃผ๋)**: Tier 1 ๊ฒฐ๊ณผ๋ฅผ ๋ณด๊ณ LLM์ด "๋ถ์กฑํ๋ค" ํ๋จ โ ๋๊ตฌ ํธ์ถ๋ก ์ถ๊ฐ ํ์. tool calling ๊ฐ๋ฅํ provider์์๋ง ๋์.
|
| 22 |
+
|
| 23 |
+
Tier 1์ด ์ถฉ๋ถํ๋ฉด LLM roundtrip์ 1ํ๋ค. ์ด๊ฒ์ด ์๋์ ํต์ฌ์ด๋ค.
|
| 24 |
+
|
| 25 |
+
### ์๋ ์์น
|
| 26 |
+
|
| 27 |
+
**LLM roundtrip์ ์ค์ด๋ ๊ฒ์ด ์๋๋ค.**
|
| 28 |
+
- ๋ ๋ง์ ๋ฐ์ดํฐ๋ฅผ ๋ฏธ๋ฆฌ ์กฐ๋ฆฝํด์ 1ํ์ ๋๋ด๋ ๊ฒ์ด ๋น ๋ฅด๋ค (Tier 1 ๊ฐํ)
|
| 29 |
+
- ๋๊ตฌ ํธ์ถ์ ๋ณ๋ ฌํํ๋ ๊ฒ๋ณด๋ค, ์ ์ด์ ํธ์ถ์ด ํ์ ์๊ฒ ๋ง๋๋ ๊ฒ์ด ๋น ๋ฅด๋ค
|
| 30 |
+
- changes(๊ณต์ ๋ณํ๋ถ 23%)๋ฅผ ์ปจํ
์คํธ์ ๋ฏธ๋ฆฌ ๋ฃ์ผ๋ฉด "๋ญ๊ฐ ๋ฐ๋์์ง?" ํ์ ํธ์ถ์ด ์ฌ๋ผ์ง๋ค
|
| 31 |
+
|
| 32 |
+
### dexter์์ ํก์ํ ๊ฒ
|
| 33 |
+
|
| 34 |
+
| ํจํด | dexter ์๋ณธ | dartlab ์ ์ฉ |
|
| 35 |
+
|------|------------|-------------|
|
| 36 |
+
| Scratchpad | ๋๊ตฌ ๊ฒฐ๊ณผ ๋์ /ํ ํฐ ๊ด๋ฆฌ | `runtime/scratchpad.py` โ ์ค๋ณต ํธ์ถ ๋ฐฉ์ง, ํ ํฐ ์์ฐ |
|
| 37 |
+
| SOUL.md | ๋ถ์ ์ฒ ํ ์ฃผ์
| `templates/analysisPhilosophy.py` โ Palepu-Healy + CFA ์ฌ๊ณ ํ๋ ์ |
|
| 38 |
+
| stripFieldsDeep | ๋๊ตฌ ๊ฒฐ๊ณผ ํ๋ ์ ๊ฑฐ | `context/pruning.py` โ XBRL ๋ฉํ๋ฐ์ดํฐ ์ฌ๊ท ์ ๊ฑฐ |
|
| 39 |
+
| SKILL.md | ์ํฌํ๋ก์ฐ ๊ฐ์ด๋ | `skills/catalog.py` โ 8๊ฐ ๋ถ์ ์คํฌ (๋๊ตฌ ๋น์์กด) |
|
| 40 |
+
| ์์จ ์์ด์ ํธ | ์ถฉ๋ถํ ๋๊น์ง ํ์ | `agentLoopAutonomous()` โ report_mode Tier 2 |
|
| 41 |
+
| ์ธ์
๋ฉ๋ชจ๋ฆฌ | SQLite + ์๊ฐ ๊ฐ์ | `memory/store.py` โ ๋ถ์ ๊ธฐ๋ก ์์ |
|
| 42 |
+
|
| 43 |
+
### ํก์ํ์ง ์์ ๊ฒ
|
| 44 |
+
|
| 45 |
+
- **๋ฐ์ดํฐ ์์ ๊ตฌ์กฐ**: dexter๋ ์ธ๋ถ API๋ก ๋ฐ์ดํฐ ์์ง. dartlab์ ์ด๋ฏธ ๋ฐ์ดํฐ ์์ง์ ์์ .
|
| 46 |
+
- **๋จ์ผ ๋ชจ๋ธ ์์กด**: dexter๋ ๋ชจ๋ ํ๋จ์ LLM์ ์์. dartlab์ ์์ง์ด ๊ณ์ฐ/ํ๋จํ๊ณ LLM์ ํด์๋ง.
|
| 47 |
+
- **meta-tool ํจํด**: ๋๊ตฌ ์์ ๋๊ตฌ๋ฅผ ๋ฃ๋ ๊ตฌ์กฐ. dartlab์ Super Tool 7๊ฐ๋ก ์ด๋ฏธ ํด๊ฒฐ.
|
| 48 |
+
|
| 49 |
+
### ์ฌ์ฉ์ ์์น
|
| 50 |
+
|
| 51 |
+
- **์ ๊ทผ์ฑ**: ์ข
๋ชฉ์ฝ๋ ํ๋๋ฉด ๋. `dartlab ask "005930" "์์
์ด์ต๋ฅ ์ถ์ธ๋?"` ๋๋ `dartlab chat`์ผ๋ก ์ธํฐ๋ํฐ๋ธ.
|
| 52 |
+
- **์ ๋ขฐ์ฑ**: ์ซ์๋ ์์ง์ด ๊ณ์ฐํ ์๋ณธ. LLM์ด ์ซ์๋ฅผ ๋ง๋ค์ด๋ด๋ฉด ๊ฒ์ฆ ๋ ์ด์ด๊ฐ ์ก๋๋ค.
|
| 53 |
+
- **ํฌ๋ช
์ฑ**: ์ด๋ค ๋ฐ์ดํฐ๋ฅผ ๋ดค๋์ง(includedEvidence), ์ด๋ค ๋๊ตฌ๋ฅผ ์ผ๋์ง(tool_call) ํญ์ ๋
ธ์ถ.
|
| 54 |
+
|
| 55 |
+
### ํ์ง ๊ฒ์ฆ ๊ธฐ์ค์ (2026-03-27)
|
| 56 |
+
|
| 57 |
+
ollama qwen3:4b ๊ธฐ์ค critical+high 35๊ฑด ๋ฐฐ์น ๊ฒฐ๊ณผ:
|
| 58 |
+
|
| 59 |
+
| ์งํ | ๊ฐ | ๋น๊ณ |
|
| 60 |
+
|------|-----|------|
|
| 61 |
+
| avgOverall | 7.33 | gemini fallback ์์ ํ ์ฌ์ธก์ (์์ ์ 5.98) |
|
| 62 |
+
| routeMatch | 1.00 | intent ๋ถ๋ฅ + ๋ผ์ฐํ
์๋ฒฝ |
|
| 63 |
+
| moduleUtilization | 0.75 | ์ผ๋ถ eval ์ผ์ด์ค ์ ํฉ์ฑ ๋ฌธ์ ํฌํจ |
|
| 64 |
+
| falseUnavailable | 0/35 | "๋ฐ์ดํฐ ์๋ค" ๊ฑฐ์ง ์๋ต ์์ |
|
| 65 |
+
|
| 66 |
+
production ๋ชจ๋ธ(openai/gemini) ์ธก์ ์ API ํค ํ๋ณด ํ ์งํ ์์ . factual accuracy๋ production ๋ชจ๋ธ์์๋ง ์ ์๋ฏธ.
|
| 67 |
+
|
| 68 |
+
์ฃผ์ failure taxonomy:
|
| 69 |
+
- **runtime_error**: provider ์ค์ ์ ํฉ์ฑ (ํด๊ฒฐ๋จ)
|
| 70 |
+
- **retrieval_failure**: eval ์ผ์ด์ค expectedModules์ ์ค์ ์ปจํ
์คํธ ๋น๋ ๋งคํ ๊ฐ๊ทน
|
| 71 |
+
- **generation_failure**: ์ํ ๋ชจ๋ธ ํ๊ณ (production ๋ชจ๋ธ์์ ์ฌ์ธก์ ํ์)
|
| 72 |
+
|
| 73 |
+
---
|
| 74 |
+
|
| 75 |
## Source Of Truth
|
| 76 |
|
| 77 |
- ๋ฐ์ดํฐ source-of-truth: `src/dartlab/core/registry.py`
|
src/dartlab/ai/context/builder.py
CHANGED
|
@@ -231,8 +231,7 @@ _CANDIDATE_ALIASES = {
|
|
| 231 |
}
|
| 232 |
_MARGIN_DRIVER_MARGIN_HINTS = frozenset({"์์
์ด์ต๋ฅ ", "๋ง์ง", "์ด์ต๋ฅ ", "margin"})
|
| 233 |
_MARGIN_DRIVER_COST_HINTS = frozenset({"๋น์ฉ ๊ตฌ์กฐ", "์๊ฐ ๊ตฌ์กฐ", "๋น์ฉ", "์๊ฐ", "ํ๊ด๋น", "๋งค์ถ์๊ฐ"})
|
| 234 |
-
|
| 235 |
-
_RECENT_DISCLOSURE_BUSINESS_HINTS = frozenset({"์ฌ์
๋ณํ", "์ฌ์
๋ณํ", "์ฌ์
๊ตฌ์กฐ", "์ฌ์
๊ตฌ์กฐ"})
|
| 236 |
_PERIOD_COLUMN_RE = re.compile(r"^\d{4}(?:Q[1-4])?$")
|
| 237 |
|
| 238 |
|
|
@@ -372,13 +371,13 @@ def _has_margin_driver_pattern(question: str) -> bool:
|
|
| 372 |
return (
|
| 373 |
_question_has_any(question, _MARGIN_DRIVER_MARGIN_HINTS)
|
| 374 |
and _question_has_any(question, _MARGIN_DRIVER_COST_HINTS)
|
| 375 |
-
and _question_has_any(question,
|
| 376 |
)
|
| 377 |
|
| 378 |
|
| 379 |
def _has_recent_disclosure_business_pattern(question: str) -> bool:
|
| 380 |
lowered = question.lower()
|
| 381 |
-
return "์ต๊ทผ ๊ณต์" in lowered and _question_has_any(question,
|
| 382 |
|
| 383 |
|
| 384 |
def _resolve_direct_hint_modules(question: str) -> list[str]:
|
|
@@ -957,6 +956,61 @@ def _build_sections_context(
|
|
| 957 |
return result
|
| 958 |
|
| 959 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 960 |
def _select_section_slices(context_slices: Any, topic: str) -> pl.DataFrame | None:
|
| 961 |
if not isinstance(context_slices, pl.DataFrame) or context_slices.is_empty():
|
| 962 |
return None
|
|
@@ -1203,6 +1257,14 @@ def _build_compact_context_modules_inner(
|
|
| 1203 |
if included_name not in included:
|
| 1204 |
included.append(included_name)
|
| 1205 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1206 |
direct_sections = _build_direct_module_context(
|
| 1207 |
company,
|
| 1208 |
candidate_plan.get("direct", []),
|
|
|
|
| 231 |
}
|
| 232 |
_MARGIN_DRIVER_MARGIN_HINTS = frozenset({"์์
์ด์ต๋ฅ ", "๋ง์ง", "์ด์ต๋ฅ ", "margin"})
|
| 233 |
_MARGIN_DRIVER_COST_HINTS = frozenset({"๋น์ฉ ๊ตฌ์กฐ", "์๊ฐ ๊ตฌ์กฐ", "๋น์ฉ", "์๊ฐ", "ํ๊ด๋น", "๋งค์ถ์๊ฐ"})
|
| 234 |
+
_BUSINESS_CHANGE_HINTS = frozenset({"์ฌ์
๋ณํ", "์ฌ์
๋ณํ", "์ฌ์
๊ตฌ์กฐ", "์ฌ์
๊ตฌ์กฐ"})
|
|
|
|
| 235 |
_PERIOD_COLUMN_RE = re.compile(r"^\d{4}(?:Q[1-4])?$")
|
| 236 |
|
| 237 |
|
|
|
|
| 371 |
return (
|
| 372 |
_question_has_any(question, _MARGIN_DRIVER_MARGIN_HINTS)
|
| 373 |
and _question_has_any(question, _MARGIN_DRIVER_COST_HINTS)
|
| 374 |
+
and _question_has_any(question, _BUSINESS_CHANGE_HINTS)
|
| 375 |
)
|
| 376 |
|
| 377 |
|
| 378 |
def _has_recent_disclosure_business_pattern(question: str) -> bool:
|
| 379 |
lowered = question.lower()
|
| 380 |
+
return "์ต๊ทผ ๊ณต์" in lowered and _question_has_any(question, _BUSINESS_CHANGE_HINTS)
|
| 381 |
|
| 382 |
|
| 383 |
def _resolve_direct_hint_modules(question: str) -> list[str]:
|
|
|
|
| 956 |
return result
|
| 957 |
|
| 958 |
|
| 959 |
+
def _build_changes_context(company: Any, *, compact: bool = True) -> str:
|
| 960 |
+
"""sections ๋ณํ ์์ฝ์ LLM ์ปจํ
์คํธ์ฉ ๋งํฌ๋ค์ด์ผ๋ก ๋ณํ.
|
| 961 |
+
|
| 962 |
+
์ ์ฒด sections(97MB) ๋์ ๋ณํ๋ถ(23%)๋ง ์์ฝํ์ฌ ์ ๊ณต.
|
| 963 |
+
LLM์ด ์ถ๊ฐ ๋๊ตฌ ํธ์ถ ์์ด "๋ฌด์์ด ๋ฐ๋์๋์ง" ์ฆ์ ํ์
๊ฐ๋ฅ.
|
| 964 |
+
"""
|
| 965 |
+
docs = getattr(company, "docs", None)
|
| 966 |
+
sections = getattr(docs, "sections", None)
|
| 967 |
+
if sections is None or not hasattr(sections, "changeSummary"):
|
| 968 |
+
return ""
|
| 969 |
+
|
| 970 |
+
try:
|
| 971 |
+
summary = sections.changeSummary(topN=8 if compact else 15)
|
| 972 |
+
except (AttributeError, TypeError, ValueError, pl.exceptions.PolarsError):
|
| 973 |
+
return ""
|
| 974 |
+
|
| 975 |
+
if summary is None or summary.is_empty():
|
| 976 |
+
return ""
|
| 977 |
+
|
| 978 |
+
lines = ["\n## ๊ณต์ ๋ณํ ์์ฝ"]
|
| 979 |
+
lines.append("| topic | ๋ณํ์ ํ | ๊ฑด์ | ํ๊ท ํฌ๊ธฐ๋ณํ |")
|
| 980 |
+
lines.append("|-------|---------|------|------------|")
|
| 981 |
+
for row in summary.iter_rows(named=True):
|
| 982 |
+
topic = row.get("topic", "")
|
| 983 |
+
changeType = row.get("changeType", "")
|
| 984 |
+
count = row.get("count", 0)
|
| 985 |
+
avgDelta = row.get("avgDelta", 0)
|
| 986 |
+
sign = "+" if avgDelta and avgDelta > 0 else ""
|
| 987 |
+
lines.append(f"| {topic} | {changeType} | {count} | {sign}{avgDelta} |")
|
| 988 |
+
|
| 989 |
+
# ์ต๊ทผ ๊ธฐ๊ฐ ์ฃผ์ ๋ณํ ๋ฏธ๋ฆฌ๋ณด๊ธฐ
|
| 990 |
+
try:
|
| 991 |
+
changes = sections.changes()
|
| 992 |
+
except (AttributeError, TypeError, ValueError, pl.exceptions.PolarsError):
|
| 993 |
+
changes = None
|
| 994 |
+
|
| 995 |
+
if changes is not None and not changes.is_empty():
|
| 996 |
+
# ๊ฐ์ฅ ์ต๊ทผ ๊ธฐ๊ฐ ์ ํ์์ structural/appeared ๋ณํ๋ง ๋ฐ์ท
|
| 997 |
+
latestPeriod = changes.get_column("toPeriod").max()
|
| 998 |
+
recent = changes.filter(
|
| 999 |
+
(pl.col("toPeriod") == latestPeriod) & pl.col("changeType").is_in(["structural", "appeared"])
|
| 1000 |
+
)
|
| 1001 |
+
if not recent.is_empty():
|
| 1002 |
+
lines.append(f"\n### ์ต๊ทผ ์ฃผ์ ๋ณํ ({latestPeriod})")
|
| 1003 |
+
for row in recent.head(5 if compact else 10).iter_rows(named=True):
|
| 1004 |
+
topic = row.get("topic", "")
|
| 1005 |
+
ct = row.get("changeType", "")
|
| 1006 |
+
preview = row.get("preview", "")
|
| 1007 |
+
if preview:
|
| 1008 |
+
preview = preview[:120] + "..." if len(preview) > 120 else preview
|
| 1009 |
+
lines.append(f"- **{topic}** [{ct}]: {preview}")
|
| 1010 |
+
|
| 1011 |
+
return "\n".join(lines)
|
| 1012 |
+
|
| 1013 |
+
|
| 1014 |
def _select_section_slices(context_slices: Any, topic: str) -> pl.DataFrame | None:
|
| 1015 |
if not isinstance(context_slices, pl.DataFrame) or context_slices.is_empty():
|
| 1016 |
return None
|
|
|
|
| 1257 |
if included_name not in included:
|
| 1258 |
included.append(included_name)
|
| 1259 |
|
| 1260 |
+
# ๋ณํ ์ปจํ
์คํธ โ sections ๋ณํ๋ถ๋ง LLM์ ์ ๋ฌ (roundtrip ๊ฐ์)
|
| 1261 |
+
if route in {"sections", "hybrid"}:
|
| 1262 |
+
changes_context = _build_changes_context(company, compact=compact)
|
| 1263 |
+
if changes_context:
|
| 1264 |
+
modules_dict["_changes"] = changes_context
|
| 1265 |
+
if "_changes" not in included:
|
| 1266 |
+
included.append("_changes")
|
| 1267 |
+
|
| 1268 |
direct_sections = _build_direct_module_context(
|
| 1269 |
company,
|
| 1270 |
candidate_plan.get("direct", []),
|
src/dartlab/ai/context/pruning.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""๋๊ตฌ ๊ฒฐ๊ณผ ํ๋ pruning โ LLM์ ๋ถํ์ํ ์ปฌ๋ผ/ํ๋ ์ฌ๊ท ์ ๊ฑฐ.
|
| 2 |
+
|
| 3 |
+
dexter์ stripFieldsDeep ํจํด์ Python์ ์ ์ฉ.
|
| 4 |
+
ํ ํฐ ์ ์ฝ + ๋ถ์ ๊ด๋ จ์ฑ ํฅ์.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import json
|
| 10 |
+
from typing import Any
|
| 11 |
+
|
| 12 |
+
# LLM ๋ถ์์ ๋ถํ์ํ ํ๋ โ ์ฌ๊ท์ ์ผ๋ก ์ ๊ฑฐ
|
| 13 |
+
_STRIP_FIELDS: frozenset[str] = frozenset(
|
| 14 |
+
{
|
| 15 |
+
# XBRL ๋ฉํ๋ฐ์ดํฐ
|
| 16 |
+
"concept_id",
|
| 17 |
+
"xbrl_context_id",
|
| 18 |
+
"instant",
|
| 19 |
+
"member",
|
| 20 |
+
"dimension",
|
| 21 |
+
"label_ko_raw",
|
| 22 |
+
# ๊ณต์ ๋ฉํ๋ฐ์ดํฐ
|
| 23 |
+
"acceptance_number",
|
| 24 |
+
"rcept_no",
|
| 25 |
+
"filing_date",
|
| 26 |
+
"report_code",
|
| 27 |
+
"reprt_code",
|
| 28 |
+
"corp_cls",
|
| 29 |
+
"corp_code",
|
| 30 |
+
# ๊ธฐ์ ์ ์๋ณ์
|
| 31 |
+
"sj_div",
|
| 32 |
+
"ord",
|
| 33 |
+
"data_rank",
|
| 34 |
+
"source_file",
|
| 35 |
+
"source_path",
|
| 36 |
+
"sourceBlockOrder",
|
| 37 |
+
# ์ค๋ณต/๋ด๋ถ์ฉ
|
| 38 |
+
"account_id_raw",
|
| 39 |
+
"account_nm_raw",
|
| 40 |
+
"currency",
|
| 41 |
+
}
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
# ๋ชจ๋๋ณ ์ถ๊ฐ ์ ๊ฑฐ ํ๋
|
| 45 |
+
_MODULE_STRIP: dict[str, frozenset[str]] = {
|
| 46 |
+
"finance": frozenset({"bsns_year", "sj_nm", "stock_code", "fs_div", "fs_nm"}),
|
| 47 |
+
"explore": frozenset({"blockHash", "rawHtml", "charCount"}),
|
| 48 |
+
"report": frozenset({"rcept_no", "corp_code", "corp_cls"}),
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def pruneToolResult(toolName: str, result: str, *, maxChars: int = 8000) -> str:
|
| 53 |
+
"""๋๊ตฌ ๊ฒฐ๊ณผ ๋ฌธ์์ด์์ ๋ถํ์ ํ๋๋ฅผ ์ ๊ฑฐ."""
|
| 54 |
+
if not result or len(result) < 100:
|
| 55 |
+
return result
|
| 56 |
+
|
| 57 |
+
# JSON ํ์ฑ ์๋
|
| 58 |
+
try:
|
| 59 |
+
data = json.loads(result)
|
| 60 |
+
except (json.JSONDecodeError, ValueError):
|
| 61 |
+
# JSON์ด ์๋๋ฉด ๊ทธ๋๋ก ๋ฐํ (๋งํฌ๋ค์ด ํ
์ด๋ธ ๋ฑ)
|
| 62 |
+
return result[:maxChars] if len(result) > maxChars else result
|
| 63 |
+
|
| 64 |
+
# ๋ชจ๋๋ณ ์ถ๊ฐ ํ๋ ๊ฒฐ์
|
| 65 |
+
category = _resolveCategory(toolName)
|
| 66 |
+
extra = _MODULE_STRIP.get(category, frozenset())
|
| 67 |
+
stripFields = _STRIP_FIELDS | extra
|
| 68 |
+
|
| 69 |
+
pruned = _pruneValue(data, stripFields, depth=0)
|
| 70 |
+
text = json.dumps(pruned, ensure_ascii=False, indent=2, default=str)
|
| 71 |
+
if len(text) > maxChars:
|
| 72 |
+
return text[:maxChars] + "\n... (pruned+truncated)"
|
| 73 |
+
return text
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def _pruneValue(value: Any, stripFields: frozenset[str], depth: int) -> Any:
|
| 77 |
+
"""์ฌ๊ท์ ํ๋ ์ ๊ฑฐ."""
|
| 78 |
+
if depth > 8:
|
| 79 |
+
return value
|
| 80 |
+
if isinstance(value, dict):
|
| 81 |
+
return {k: _pruneValue(v, stripFields, depth + 1) for k, v in value.items() if k not in stripFields}
|
| 82 |
+
if isinstance(value, list):
|
| 83 |
+
return [_pruneValue(item, stripFields, depth + 1) for item in value]
|
| 84 |
+
return value
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def _resolveCategory(toolName: str) -> str:
|
| 88 |
+
"""๋๊ตฌ ์ด๋ฆ์์ ์นดํ
๊ณ ๋ฆฌ ์ถ์ถ."""
|
| 89 |
+
if toolName in ("finance", "get_data", "compute_ratios"):
|
| 90 |
+
return "finance"
|
| 91 |
+
if toolName in ("explore", "show", "search_data"):
|
| 92 |
+
return "explore"
|
| 93 |
+
if toolName in ("report", "get_report"):
|
| 94 |
+
return "report"
|
| 95 |
+
return ""
|
src/dartlab/ai/conversation/prompts.py
CHANGED
|
@@ -36,6 +36,14 @@ from .templates.analysis_rules import (
|
|
| 36 |
from .templates.analysis_rules import (
|
| 37 |
TOPIC_PROMPTS as _TOPIC_PROMPTS,
|
| 38 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
from .templates.benchmarks import _INDUSTRY_BENCHMARKS, _SECTOR_MAP
|
| 40 |
from .templates.self_critique import (
|
| 41 |
SELF_CRITIQUE_PROMPT,
|
|
@@ -43,8 +51,6 @@ from .templates.self_critique import (
|
|
| 43 |
from .templates.self_critique import (
|
| 44 |
SIGNAL_KEYWORDS as _SIGNAL_KEYWORDS,
|
| 45 |
)
|
| 46 |
-
|
| 47 |
-
# โโ ํ
ํ๋ฆฟ ๋ฐ์ดํฐ ์ํฌํธ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 48 |
from .templates.system_base import (
|
| 49 |
EDGAR_SUPPLEMENT_EN,
|
| 50 |
EDGAR_SUPPLEMENT_KR,
|
|
@@ -63,6 +69,19 @@ _PLUGIN_SYSTEM_PROMPT = """
|
|
| 63 |
- ๋ถ์ ์ค ํ๋ฌ๊ทธ์ธ ์ถ์ฒ ํํธ๊ฐ ์ ๊ณต๋๋ฉด, ๋ต๋ณ ๋์ ์์ฐ์ค๋ฝ๊ฒ ์๋ดํ์ธ์.
|
| 64 |
"""
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 67 |
# ์ง๋ฌธ ๋ถ๋ฅ
|
| 68 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
@@ -235,7 +254,7 @@ def build_system_prompt_parts(
|
|
| 235 |
|
| 236 |
if compact:
|
| 237 |
base = _strip_tool_guidance(SYSTEM_PROMPT_COMPACT) if not allow_tools else SYSTEM_PROMPT_COMPACT
|
| 238 |
-
static_parts: list[str] = []
|
| 239 |
dynamic_parts: list[str] = []
|
| 240 |
|
| 241 |
benchmark_key = _match_sector(sector) if sector else None
|
|
@@ -259,7 +278,11 @@ def build_system_prompt_parts(
|
|
| 259 |
if qt in _FEW_SHOT_COMPACT:
|
| 260 |
static_parts.append(_FEW_SHOT_COMPACT[qt])
|
| 261 |
|
| 262 |
-
# ๋์ : report_mode + ํ๋ฌ๊ทธ์ธ
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
if report_mode:
|
| 264 |
dynamic_parts.append(_REPORT_PROMPT_COMPACT)
|
| 265 |
|
|
@@ -284,10 +307,10 @@ def build_system_prompt_parts(
|
|
| 284 |
base = SYSTEM_PROMPT_EN
|
| 285 |
if not allow_tools:
|
| 286 |
base = _strip_tool_guidance(base)
|
| 287 |
-
static_parts = []
|
| 288 |
dynamic_parts = []
|
| 289 |
|
| 290 |
-
# ์ ์ : ๋ฒค์น๋งํฌ + ํ ํฝ + ๊ต์ฐจ๊ฒ์ฆ + Few-shot
|
| 291 |
benchmark_key = _match_sector(sector) if sector else None
|
| 292 |
if benchmark_key and benchmark_key in _INDUSTRY_BENCHMARKS:
|
| 293 |
static_parts.append(_INDUSTRY_BENCHMARKS[benchmark_key])
|
|
@@ -314,7 +337,11 @@ def build_system_prompt_parts(
|
|
| 314 |
edgar_supp = EDGAR_SUPPLEMENT_EN if lang == "en" else EDGAR_SUPPLEMENT_KR
|
| 315 |
static_parts.append(edgar_supp)
|
| 316 |
|
| 317 |
-
# ๋์ : report_mode + ํ๋ฌ๊ทธ์ธ
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
if report_mode:
|
| 319 |
dynamic_parts.append(_REPORT_PROMPT)
|
| 320 |
|
|
|
|
| 36 |
from .templates.analysis_rules import (
|
| 37 |
TOPIC_PROMPTS as _TOPIC_PROMPTS,
|
| 38 |
)
|
| 39 |
+
|
| 40 |
+
# โโ ํ
ํ๋ฆฟ ๋ฐ์ดํฐ ์ํฌํธ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 41 |
+
from .templates.analysisPhilosophy import (
|
| 42 |
+
ANALYSIS_PHILOSOPHY_COMPACT as _PHILOSOPHY_COMPACT,
|
| 43 |
+
)
|
| 44 |
+
from .templates.analysisPhilosophy import (
|
| 45 |
+
ANALYSIS_PHILOSOPHY_KR as _PHILOSOPHY_KR,
|
| 46 |
+
)
|
| 47 |
from .templates.benchmarks import _INDUSTRY_BENCHMARKS, _SECTOR_MAP
|
| 48 |
from .templates.self_critique import (
|
| 49 |
SELF_CRITIQUE_PROMPT,
|
|
|
|
| 51 |
from .templates.self_critique import (
|
| 52 |
SIGNAL_KEYWORDS as _SIGNAL_KEYWORDS,
|
| 53 |
)
|
|
|
|
|
|
|
| 54 |
from .templates.system_base import (
|
| 55 |
EDGAR_SUPPLEMENT_EN,
|
| 56 |
EDGAR_SUPPLEMENT_KR,
|
|
|
|
| 69 |
- ๋ถ์ ์ค ํ๋ฌ๊ทธ์ธ ์ถ์ฒ ํํธ๊ฐ ์ ๊ณต๋๋ฉด, ๋ต๋ณ ๋์ ์์ฐ์ค๋ฝ๊ฒ ์๋ดํ์ธ์.
|
| 70 |
"""
|
| 71 |
|
| 72 |
+
# โโ ์คํฌ ๋งค์นญ ํฌํผ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def _matchSkillSafe(questionType: str | None, qTypes: list[str]) -> Any:
|
| 76 |
+
"""์คํฌ ๋งค์นญ (import ์คํจ ์ None)."""
|
| 77 |
+
try:
|
| 78 |
+
from dartlab.ai.skills.registry import matchSkill
|
| 79 |
+
|
| 80 |
+
return matchSkill("", questionType=questionType or (qTypes[0] if qTypes else None))
|
| 81 |
+
except Exception:
|
| 82 |
+
return None
|
| 83 |
+
|
| 84 |
+
|
| 85 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 86 |
# ์ง๋ฌธ ๋ถ๋ฅ
|
| 87 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
|
|
| 254 |
|
| 255 |
if compact:
|
| 256 |
base = _strip_tool_guidance(SYSTEM_PROMPT_COMPACT) if not allow_tools else SYSTEM_PROMPT_COMPACT
|
| 257 |
+
static_parts: list[str] = [_PHILOSOPHY_COMPACT]
|
| 258 |
dynamic_parts: list[str] = []
|
| 259 |
|
| 260 |
benchmark_key = _match_sector(sector) if sector else None
|
|
|
|
| 278 |
if qt in _FEW_SHOT_COMPACT:
|
| 279 |
static_parts.append(_FEW_SHOT_COMPACT[qt])
|
| 280 |
|
| 281 |
+
# ๋์ : skill + report_mode + ํ๋ฌ๊ทธ์ธ
|
| 282 |
+
_skill = _matchSkillSafe(question_type, q_types)
|
| 283 |
+
if _skill:
|
| 284 |
+
dynamic_parts.append(_skill.toPrompt())
|
| 285 |
+
|
| 286 |
if report_mode:
|
| 287 |
dynamic_parts.append(_REPORT_PROMPT_COMPACT)
|
| 288 |
|
|
|
|
| 307 |
base = SYSTEM_PROMPT_EN
|
| 308 |
if not allow_tools:
|
| 309 |
base = _strip_tool_guidance(base)
|
| 310 |
+
static_parts = [_PHILOSOPHY_KR]
|
| 311 |
dynamic_parts = []
|
| 312 |
|
| 313 |
+
# ์ ์ : ์ฒ ํ + ๋ฒค์น๋งํฌ + ํ ํฝ + ๊ต์ฐจ๊ฒ์ฆ + Few-shot
|
| 314 |
benchmark_key = _match_sector(sector) if sector else None
|
| 315 |
if benchmark_key and benchmark_key in _INDUSTRY_BENCHMARKS:
|
| 316 |
static_parts.append(_INDUSTRY_BENCHMARKS[benchmark_key])
|
|
|
|
| 337 |
edgar_supp = EDGAR_SUPPLEMENT_EN if lang == "en" else EDGAR_SUPPLEMENT_KR
|
| 338 |
static_parts.append(edgar_supp)
|
| 339 |
|
| 340 |
+
# ๋์ : skill + report_mode + ํ๋ฌ๊ทธ์ธ
|
| 341 |
+
_skill = _matchSkillSafe(question_type, q_types)
|
| 342 |
+
if _skill:
|
| 343 |
+
dynamic_parts.append(_skill.toPrompt())
|
| 344 |
+
|
| 345 |
if report_mode:
|
| 346 |
dynamic_parts.append(_REPORT_PROMPT)
|
| 347 |
|
src/dartlab/ai/conversation/templates/analysisPhilosophy.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""๋ถ์ ์ฒ ํ โ Palepu-Healy + CFA ํ๋ ์์ํฌ ๊ธฐ๋ฐ ์ฌ๊ณ ํ๋ ์.
|
| 2 |
+
|
| 3 |
+
๊ธฐ์กด system_base.py์ 7๋จ๊ณ ํ๋ ์์ํฌ๋ "์ด๋ป๊ฒ ๋ถ์ํ๋ผ"(์ ์ฐจ).
|
| 4 |
+
์ด ์ฒ ํ์ "์ด๋ค ๊ด์ ์ผ๋ก ๋ณด๋ผ"(์ฌ๊ณ ํ๋ ์)๋ฅผ ์ฃผ์
ํ๋ค.
|
| 5 |
+
dexter์ SOUL.md ํจํด์ dartlab์ ์ ์ฉ.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
ANALYSIS_PHILOSOPHY_KR = """\
|
| 11 |
+
## ๋ถ์ ์ฒ ํ
|
| 12 |
+
|
| 13 |
+
### ์์น 1: ์ซ์ ๋ค์ ์ด์ผ๊ธฐ๋ฅผ ์ฝ์ด๋ผ
|
| 14 |
+
์ฌ๋ฌด์ ํ๋ ๊ฒฝ์ ์์ฌ๊ฒฐ์ ์ ๊ฒฐ๊ณผ๋ฌผ์ด๋ค. ์์น ๋ณํ๋ฅผ ๋ณด๋ฉด "์?"๋ฅผ ๋ฐ๋์ ์ถ์ ํ๋ผ.
|
| 15 |
+
๋งค์ถ์ด ์ฌ๋๋ค๋ฉด โ ๋ฌผ๋์ธ๊ฐ, ๋จ๊ฐ์ธ๊ฐ, ๋ฏน์ค ๋ณํ์ธ๊ฐ?
|
| 16 |
+
์ด์ต๋ฅ ์ด ๋จ์ด์ก๋ค๋ฉด โ ์๊ฐ์ธ๊ฐ, ํ๊ด๋น์ธ๊ฐ, ์ผํ์ฑ์ธ๊ฐ?
|
| 17 |
+
|
| 18 |
+
### ์์น 2: ์ด์ต์ ์ง์ ์์ฌํ๋ผ
|
| 19 |
+
ํ๊ณ ์ด์ต๊ณผ ํ๊ธ ์ด์ต์ ๋ค๋ฅด๋ค.
|
| 20 |
+
- ์์
CF๊ฐ ์์ด์ต์ ์ง์์ ์ผ๋ก ํํํ๋ฉด ๋ฐ์์ฃผ์ ์ด์ต์ ์์ฌํ๋ผ
|
| 21 |
+
- ์ด์ ์๋ณธ ๋ณํ, ๊ฐ๊ฐ์๊ฐ ๋๋น CAPEX, ์๋ณธํ ์ ์ฑ
์ ํ์ธํ๋ผ
|
| 22 |
+
- Accrual Ratio๊ฐ ๋์ผ๋ฉด ์ด์ต์ ์ง์๊ฐ๋ฅ์ฑ์ ๋ฌผ์ํ๋ฅผ ๋ถ์ฌ๋ผ
|
| 23 |
+
|
| 24 |
+
### ์์น 3: ๊ตฌ์กฐ๋ฅผ ๋ถํดํ๋ผ
|
| 25 |
+
- ROE๋ DuPont์ผ๋ก ๋ถํด: ์์ต์ฑ ร ํจ์จ์ฑ ร ๋ ๋ฒ๋ฆฌ์ง
|
| 26 |
+
- ๋งค์ถ์ ๋ถ๋ฌธ๋ณ, ์ง์ญ๋ณ, ์ ํ๋ณ๋ก ๋ถํด
|
| 27 |
+
- ๋น์ฉ์ ์ฑ๊ฒฉ๋ณ(์์ฌ๋ฃ/์ธ๊ฑด๋น/๊ฐ๊ฐ)๋ก ๋ถํด
|
| 28 |
+
- ํฉ์ฐ ์ซ์๋ง ๋ณด๋ฉด ๊ตฌ์กฐ ๋ณํ๋ฅผ ๋์น๋ค
|
| 29 |
+
|
| 30 |
+
### ์์น 4: ๊ต์ฐจ๊ฒ์ฆํ๋ผ
|
| 31 |
+
- ๊ณต์ ์์ ๊ณผ ์ฌ๋ฌด ์์น๊ฐ ์ผ์นํ๋์ง ํ์ธ
|
| 32 |
+
- ๊ฒฝ์์ง ์ฝ๋ฉํธ์ ์ค์ ์๋ณธ ๋ฐฐ๋ถ์ด ๋ถํฉํ๋์ง ํ์ธ
|
| 33 |
+
- ๋ถ๋ฌธ ํฉ์ฐ๊ณผ ์ฐ๊ฒฐ ์์น๊ฐ ์ ํฉํ๋์ง ํ์ธ
|
| 34 |
+
- ๋ถ์ผ์น๊ฐ ์์ผ๋ฉด ๋ช
์์ ์ผ๋ก ์ง์ ํ๋ผ
|
| 35 |
+
|
| 36 |
+
### ์์น 5: ์๊ฐ์ถ์ผ๋ก ํ๋จํ๋ผ
|
| 37 |
+
- ๋จ์ผ ๋ถ๊ธฐ ์ค๋
์ท์ด ์๋๋ผ 3~5๋
์ถ์ธ๋ก ํ๋จ
|
| 38 |
+
- ์ผํ์ฑ๊ณผ ๋ฐ๋ณต์ฑ์ ๋ถ๋ฆฌ
|
| 39 |
+
- ์ฑ์ฅ์ด ์ ๊ธฐ์ ์ธ์ง ์ธ์์ ์ํ ๊ฒ์ธ์ง ๊ตฌ๋ถ
|
| 40 |
+
- ๋ฏธ๋ ์ถ์ ์ ๊ณผ๊ฑฐ ์ถ์ธ์ ์ฐ์ฅ์ด ์๋๋ผ ๊ตฌ์กฐ์ ๋ณํ๋ฅผ ๋ฐ์
|
| 41 |
+
|
| 42 |
+
### ์์น 6: ๋ฆฌ์คํฌ๋ฅผ ๋จผ์ ์ฐพ์๋ผ
|
| 43 |
+
- "์ด ํ์ฌ๊ฐ ์ ์ข์๊ฐ"๋ณด๋ค "๋ฌด์์ด ์๋ชป๋ ์ ์๋๊ฐ"๋ฅผ ๋จผ์ ํ์
|
| 44 |
+
- ๊ฐ์ฌ์๊ฒฌ ๋ณํ, ํน์๊ด๊ณ์ ๊ฑฐ๋, ํ๊ณ์ ์ฑ
๋ณ๊ฒฝ์ ์ฃผ์
|
| 45 |
+
- ๋ถ์ฑ ๋ง๊ธฐ ๊ตฌ์กฐ์ ์ด์๋ณด์๋ฐฐ์จ์ ํจ๊ป ํ์ธ
|
| 46 |
+
- ์ง์ค ๋ฆฌ์คํฌ(๋งค์ถ์ฒ, ๊ณต๊ธ์ฒ, ์ง์ญ)๋ฅผ ํ์
|
| 47 |
+
"""
|
| 48 |
+
|
| 49 |
+
ANALYSIS_PHILOSOPHY_COMPACT = """\
|
| 50 |
+
## ๋ถ์ ์์น
|
| 51 |
+
1. ์ซ์ ๋ค์ "์?"๋ฅผ ์ถ์ (๋งค์ถ=๋ฌผ๋ร๋จ๊ฐร๋ฏน์ค, ๋น์ฉ=์๊ฐ+ํ๊ด๋น)
|
| 52 |
+
2. ์ด์ต์ ์ง ์์ฌ (CF vs NI, Accrual Ratio, ์ด์ ์๋ณธ ๋ณํ)
|
| 53 |
+
3. DuPont/๋ถ๋ฌธ/์ฑ๊ฒฉ๋ณ ๋ถํด โ ํฉ์ฐ๋ง ๋ณด๋ฉด ๊ตฌ์กฐ ๋ณํ๋ฅผ ๋์นจ
|
| 54 |
+
4. ๊ณต์ ์์ โ ์ฌ๋ฌด ์์น ๊ต์ฐจ๊ฒ์ฆ โ ๋ถ์ผ์น ์ ๋ช
์์ ์ง์
|
| 55 |
+
5. 3~5๋
์ถ์ธ ํ๋จ โ ์ผํ์ฑ vs ๋ฐ๋ณต์ฑ ๋ถ๋ฆฌ
|
| 56 |
+
6. "๋ฌด์์ด ์๋ชป๋ ์ ์๋๊ฐ?" ๋จผ์ ํ์ โ ๋ฆฌ์คํฌ ์ ํ
|
| 57 |
+
"""
|
src/dartlab/ai/eval/batchResults/batch_ollama_20260327_124945.jsonl
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"caseId": "researchGather.structure.recentDisclosures", "persona": "research_gather", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 2 |
+
{"caseId": "accountant.costByNature.summary", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 3 |
+
{"caseId": "accountant.audit.redFlags", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 4 |
+
{"caseId": "investor.dividend.sustainability", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 5 |
+
{"caseId": "investor.downside.risks", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 6 |
+
{"caseId": "investor.distress.sdi", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 7 |
+
{"caseId": "analyst.margin.drivers", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 8 |
+
{"caseId": "analyst.segments.lgchem", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 9 |
+
{"caseId": "analyst.evidence.recentDisclosures", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.0, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 10 |
+
{"caseId": "accountant.ambiguous.costStructure", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 5.0, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 11 |
+
{"caseId": "analyst.quarterly.operatingProfit", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": null, "overall": 4.0, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 0.0, "failureTypes": ["generation_failure", "retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 12 |
+
{"caseId": "analyst.quarterly.revenue", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": null, "overall": 4.0, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 0.0, "failureTypes": ["generation_failure", "retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 13 |
+
{"caseId": "investor.profitMargin.context", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 14 |
+
{"caseId": "investor.growth.cashflowTrend", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 15 |
+
{"caseId": "analyst.growth.futurePlan", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.0, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 16 |
+
{"caseId": "investor.growth.revenueGrowth", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 17 |
+
{"caseId": "analyst.valuation.perComparison", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 18 |
+
{"caseId": "investor.valuation.intrinsicValue", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 19 |
+
{"caseId": "analyst.valuation.roe", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.166666666666666, "routeMatch": 1.0, "moduleUtilization": 0.6666666666666666, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 20 |
+
{"caseId": "investor.report.majorHolder", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 21 |
+
{"caseId": "accountant.report.executivePay", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 22 |
+
{"caseId": "analyst.context.evidenceCitation", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 23 |
+
{"caseId": "businessOwner.context.riskFactors", "persona": "business_owner", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 24 |
+
{"caseId": "investor.context.disclosureChange", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 6.0, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 25 |
+
{"caseId": "analyst.notes.rndExpense", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 26 |
+
{"caseId": "accountant.notes.tangibleAsset", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 6.0, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 27 |
+
{"caseId": "analyst.notes.segmentDetail", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.0, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 28 |
+
{"caseId": "accountant.edge.financialCompany", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 6.166666666666666, "routeMatch": 1.0, "moduleUtilization": 0.6666666666666666, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 29 |
+
{"caseId": "accountant.cost.rndRatio", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 6.166666666666666, "routeMatch": 1.0, "moduleUtilization": 0.6666666666666666, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 30 |
+
{"caseId": "analyst.cost.opexBreakdown", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 31 |
+
{"caseId": "analyst.deep.comprehensiveHealth", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 32 |
+
{"caseId": "investor.deep.investmentThesis", "persona": "investor", "severity": "critical", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 33 |
+
{"caseId": "investor.followup.deeperDividend", "persona": "investor", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 34 |
+
{"caseId": "analyst.followup.whyMarginDrop", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 6.166666666666666, "routeMatch": 1.0, "moduleUtilization": 0.6666666666666666, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["clarification_failure", "retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
| 35 |
+
{"caseId": "accountant.stability.debtAnalysis", "persona": "accountant", "severity": "high", "provider": "ollama", "model": null, "overall": 6.5, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["empty_answer", "runtime_error"], "answerLength": 0, "timestamp": "20260327_124945"}
|
src/dartlab/ai/eval/batchResults/batch_ollama_20260327_131602.jsonl
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"caseId": "analyst.quarterly.operatingProfit", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": null, "overall": 5.0, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 0.0, "failureTypes": ["generation_failure"], "answerLength": 0, "timestamp": "20260327_131602"}
|
| 2 |
+
{"caseId": "analyst.quarterly.revenue", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": null, "overall": 8.727272727272727, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 0.8181818181818182, "failureTypes": [], "answerLength": 739, "timestamp": "20260327_131602"}
|
| 3 |
+
{"caseId": "analyst.deep.comprehensiveHealth", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": null, "overall": 10.083333333333332, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": [], "answerLength": 687, "timestamp": "20260327_131602"}
|
| 4 |
+
{"caseId": "investor.deep.investmentThesis", "persona": "investor", "severity": "critical", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure"], "answerLength": 918, "timestamp": "20260327_131602"}
|
src/dartlab/ai/eval/batchResults/batch_ollama_20260327_132810.jsonl
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"caseId": "analyst.margin.drivers", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 8.083333333333334, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": [], "answerLength": 186, "timestamp": "20260327_132810"}
|
| 2 |
+
{"caseId": "analyst.segments.lgchem", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 9.25, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": [], "answerLength": 407, "timestamp": "20260327_132810"}
|
| 3 |
+
{"caseId": "analyst.evidence.recentDisclosures", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 8.166666666666666, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure"], "answerLength": 310, "timestamp": "20260327_132810"}
|
| 4 |
+
{"caseId": "analyst.growth.futurePlan", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 8.5, "routeMatch": 1.0, "moduleUtilization": 0.5, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure"], "answerLength": 319, "timestamp": "20260327_132810"}
|
| 5 |
+
{"caseId": "analyst.valuation.perComparison", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_132810"}
|
| 6 |
+
{"caseId": "analyst.valuation.roe", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 10.537878787878789, "routeMatch": 1.0, "moduleUtilization": 0.6666666666666666, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure"], "answerLength": 375, "timestamp": "20260327_132810"}
|
| 7 |
+
{"caseId": "analyst.context.evidenceCitation", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 9.916666666666668, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure"], "answerLength": 804, "timestamp": "20260327_132810"}
|
| 8 |
+
{"caseId": "analyst.notes.rndExpense", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 9.291666666666666, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": [], "answerLength": 61, "timestamp": "20260327_132810"}
|
| 9 |
+
{"caseId": "analyst.notes.segmentDetail", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 5.5, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure", "runtime_error"], "answerLength": 0, "timestamp": "20260327_132810"}
|
| 10 |
+
{"caseId": "analyst.cost.opexBreakdown", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 9.0, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": [], "answerLength": 235, "timestamp": "20260327_132810"}
|
| 11 |
+
{"caseId": "analyst.followup.whyMarginDrop", "persona": "analyst", "severity": "high", "provider": "ollama", "model": null, "overall": 10.333333333333334, "routeMatch": 1.0, "moduleUtilization": 0.6666666666666666, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["clarification_failure", "retrieval_failure"], "answerLength": 872, "timestamp": "20260327_132810"}
|
src/dartlab/ai/eval/diagnosisReports/diagnosis_batch_20260327_124945.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Eval ์ง๋จ ๋ฆฌํฌํธ โ 2026-03-27 12:49
|
| 2 |
+
|
| 3 |
+
## ์ฝ์ ์ ํ (ํ์ ์ ์)
|
| 4 |
+
|
| 5 |
+
| ์ ํ | ํ๊ท ์ ์ | ์ผ์ด์ค ์ | ์ฃผ์ ์คํจ |
|
| 6 |
+
|------|---------|---------|---------|
|
| 7 |
+
| unknown | 5.98 | 35 | runtime_error, retrieval_failure, empty_answer |
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
# ๊ฐ์ ๊ณํ (Remediation)
|
| 11 |
+
|
| 12 |
+
| ์ฐ์ ์์ | Failure | ๋์ ํ์ผ | ์ค๋ช
| ์ํฅ๋ |
|
| 13 |
+
|---------|---------|----------|------|-------|
|
| 14 |
+
| P1 | retrieval_failure | `engines/ai/context/finance_context.py` | _QUESTION_MODULES ๋งคํ์ ๋ชจ๋ ์ถ๊ฐ (๋ฐ์ 20ํ) | high |
|
| 15 |
+
| P3 | generation_failure | `engines/ai/conversation/templates/analysis_rules.py` | ๋ถ์ ๊ท์น์ few-shot ์์ ์ถ๊ฐ (๋ฐ์ 2ํ) | medium |
|
| 16 |
+
| P4 | clarification_failure | `engines/ai/conversation/system_base.py` | clarification ์ ์ฑ
์กฐ๊ฑด ์์ (๋ฐ์ 1ํ) | low |
|
| 17 |
+
| P5 | empty_answer | `(๋งคํ ์์)` | ์ failure ์ ํ โ ๋งคํ ์ถ๊ฐ ํ์ (๋ฐ์ 15ํ) | unknown |
|
| 18 |
+
| P5 | runtime_error | `(๋งคํ ์์)` | ์ failure ์ ํ โ ๋งคํ ์ถ๊ฐ ํ์ (๋ฐ์ 35ํ) | unknown |
|
| 19 |
+
|
| 20 |
+
**์ฆ์ ์กฐ์น ํ์**: 1๊ฑด
|
| 21 |
+
- [retrieval_failure] โ `engines/ai/context/finance_context.py`
|
src/dartlab/ai/eval/diagnosisReports/diagnosis_batch_20260327_131602.md
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Eval ์ง๋จ ๋ฆฌํฌํธ โ 2026-03-27 13:16
|
| 2 |
+
|
| 3 |
+
## ์ฝ์ ์ ํ (ํ์ ์ ์)
|
| 4 |
+
|
| 5 |
+
| ์ ํ | ํ๊ท ์ ์ | ์ผ์ด์ค ์ | ์ฃผ์ ์คํจ |
|
| 6 |
+
|------|---------|---------|---------|
|
| 7 |
+
| unknown | 7.33 | 4 | generation_failure, retrieval_failure |
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
# ๊ฐ์ ๊ณํ (Remediation)
|
| 11 |
+
|
| 12 |
+
| ์ฐ์ ์์ | Failure | ๋์ ํ์ผ | ์ค๋ช
| ์ํฅ๋ |
|
| 13 |
+
|---------|---------|----------|------|-------|
|
| 14 |
+
| P3 | retrieval_failure | `engines/ai/context/finance_context.py` | _QUESTION_MODULES ๋งคํ์ ๋ชจ๋ ์ถ๊ฐ (๋ฐ์ 1ํ) | high |
|
| 15 |
+
| P4 | generation_failure | `engines/ai/conversation/templates/analysis_rules.py` | ๋ถ์ ๊ท์น์ few-shot ์์ ์ถ๊ฐ (๋ฐ์ 1ํ) | medium |
|
src/dartlab/ai/memory/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""์ธ์
๊ฐ ๋ถ์ ๋ฉ๋ชจ๋ฆฌ โ SQLite ๊ธฐ๋ฐ.
|
| 2 |
+
|
| 3 |
+
์ข
๋ชฉ๋ณ ๋ถ์ ํ์คํ ๋ฆฌ๋ฅผ ์์ํ์ฌ ์ฌ๋ถ์ ์ ์ด์ ๋งฅ๋ฝ์ ํ์ฉํ๋ค.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from dartlab.ai.memory.store import AnalysisMemory
|
| 7 |
+
|
| 8 |
+
__all__ = ["AnalysisMemory"]
|
src/dartlab/ai/memory/store.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""๋ถ์ ๋ฉ๋ชจ๋ฆฌ ์ ์ฅ์ โ SQLite ๊ธฐ๋ฐ ์ธ์
๊ฐ ์์.
|
| 2 |
+
|
| 3 |
+
Company ๊ฐ์ฒด(200~500MB)๋ ์ ์ฅํ์ง ์๋๋ค.
|
| 4 |
+
stockCode + ์์ + ์ง๋ฌธ ์์ฝ + ๊ฒฐ๊ณผ ์์ฝ๋ง ์ ์ฅํ์ฌ ๋ฉ๋ชจ๋ฆฌ ์์ .
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import sqlite3
|
| 10 |
+
import time
|
| 11 |
+
from dataclasses import dataclass
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
|
| 14 |
+
_DB_FILENAME = "analysisMemory.db"
|
| 15 |
+
_MAX_DB_SIZE_MB = 50
|
| 16 |
+
_MAX_SUMMARY_CHARS = 500
|
| 17 |
+
|
| 18 |
+
# ์ฑ๊ธํด ์ธ์คํด์ค
|
| 19 |
+
_instance: AnalysisMemory | None = None
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@dataclass(frozen=True)
|
| 23 |
+
class MemoryRecord:
|
| 24 |
+
"""์ ์ฅ๋ ๋ถ์ ๊ธฐ๋ก."""
|
| 25 |
+
|
| 26 |
+
stockCode: str
|
| 27 |
+
question: str
|
| 28 |
+
questionType: str
|
| 29 |
+
resultSummary: str
|
| 30 |
+
timestamp: float
|
| 31 |
+
grade: str | None = None
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class AnalysisMemory:
|
| 35 |
+
"""SQLite ๊ธฐ๋ฐ ๋ถ์ ํ์คํ ๋ฆฌ ์ ์ฅ์."""
|
| 36 |
+
|
| 37 |
+
def __init__(self, dbPath: Path | None = None) -> None:
|
| 38 |
+
if dbPath is None:
|
| 39 |
+
dbPath = Path.home() / ".dartlab" / _DB_FILENAME
|
| 40 |
+
self._dbPath = dbPath
|
| 41 |
+
self._conn: sqlite3.Connection | None = None
|
| 42 |
+
|
| 43 |
+
def _ensureDb(self) -> sqlite3.Connection:
|
| 44 |
+
"""lazy init โ AI ๋ถ์ ์์๋ง ์ฐ๊ฒฐ."""
|
| 45 |
+
if self._conn is not None:
|
| 46 |
+
return self._conn
|
| 47 |
+
|
| 48 |
+
self._dbPath.parent.mkdir(parents=True, exist_ok=True)
|
| 49 |
+
conn = sqlite3.connect(str(self._dbPath), timeout=5)
|
| 50 |
+
conn.execute(
|
| 51 |
+
"""CREATE TABLE IF NOT EXISTS analysis (
|
| 52 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 53 |
+
stockCode TEXT NOT NULL,
|
| 54 |
+
question TEXT NOT NULL,
|
| 55 |
+
questionType TEXT DEFAULT '',
|
| 56 |
+
resultSummary TEXT DEFAULT '',
|
| 57 |
+
grade TEXT DEFAULT '',
|
| 58 |
+
timestamp REAL NOT NULL
|
| 59 |
+
)"""
|
| 60 |
+
)
|
| 61 |
+
conn.execute("CREATE INDEX IF NOT EXISTS idx_stock ON analysis(stockCode)")
|
| 62 |
+
conn.execute("CREATE INDEX IF NOT EXISTS idx_ts ON analysis(timestamp)")
|
| 63 |
+
conn.commit()
|
| 64 |
+
self._conn = conn
|
| 65 |
+
return conn
|
| 66 |
+
|
| 67 |
+
def saveAnalysis(
|
| 68 |
+
self,
|
| 69 |
+
stockCode: str,
|
| 70 |
+
question: str,
|
| 71 |
+
questionType: str = "",
|
| 72 |
+
resultSummary: str = "",
|
| 73 |
+
grade: str | None = None,
|
| 74 |
+
) -> None:
|
| 75 |
+
"""๋ถ์ ๊ฒฐ๊ณผ ์ ์ฅ."""
|
| 76 |
+
conn = self._ensureDb()
|
| 77 |
+
summary = resultSummary[:_MAX_SUMMARY_CHARS] if resultSummary else ""
|
| 78 |
+
conn.execute(
|
| 79 |
+
"INSERT INTO analysis (stockCode, question, questionType, resultSummary, grade, timestamp) "
|
| 80 |
+
"VALUES (?, ?, ?, ?, ?, ?)",
|
| 81 |
+
(stockCode, question[:200], questionType, summary, grade or "", time.time()),
|
| 82 |
+
)
|
| 83 |
+
conn.commit()
|
| 84 |
+
self._enforceSizeLimit(conn)
|
| 85 |
+
|
| 86 |
+
def recallForStock(
|
| 87 |
+
self,
|
| 88 |
+
stockCode: str,
|
| 89 |
+
limit: int = 5,
|
| 90 |
+
decayDays: int = 90,
|
| 91 |
+
) -> list[MemoryRecord]:
|
| 92 |
+
"""์ข
๋ชฉ๋ณ ์ต๊ทผ ๋ถ์ ๊ธฐ๋ก ์กฐํ (์๊ฐ ๊ฐ์ ์ ์ฉ)."""
|
| 93 |
+
conn = self._ensureDb()
|
| 94 |
+
cutoff = time.time() - (decayDays * 86400)
|
| 95 |
+
rows = conn.execute(
|
| 96 |
+
"SELECT stockCode, question, questionType, resultSummary, timestamp, grade "
|
| 97 |
+
"FROM analysis WHERE stockCode = ? AND timestamp > ? "
|
| 98 |
+
"ORDER BY timestamp DESC LIMIT ?",
|
| 99 |
+
(stockCode, cutoff, limit),
|
| 100 |
+
).fetchall()
|
| 101 |
+
return [
|
| 102 |
+
MemoryRecord(
|
| 103 |
+
stockCode=r[0],
|
| 104 |
+
question=r[1],
|
| 105 |
+
questionType=r[2],
|
| 106 |
+
resultSummary=r[3],
|
| 107 |
+
timestamp=r[4],
|
| 108 |
+
grade=r[5] or None,
|
| 109 |
+
)
|
| 110 |
+
for r in rows
|
| 111 |
+
]
|
| 112 |
+
|
| 113 |
+
def toPromptContext(self, stockCode: str) -> str:
|
| 114 |
+
"""์ด์ ๋ถ์ ๊ธฐ๋ก์ ํ๋กฌํํธ์ฉ ํ
์คํธ๋ก ๋ณํ."""
|
| 115 |
+
records = self.recallForStock(stockCode)
|
| 116 |
+
if not records:
|
| 117 |
+
return ""
|
| 118 |
+
lines = ["## ์ด์ ๋ถ์ ๊ธฐ๋ก"]
|
| 119 |
+
for r in records:
|
| 120 |
+
import datetime
|
| 121 |
+
|
| 122 |
+
dt = datetime.datetime.fromtimestamp(r.timestamp).strftime("%Y-%m-%d")
|
| 123 |
+
grade_str = f" [๋ฑ๊ธ: {r.grade}]" if r.grade else ""
|
| 124 |
+
lines.append(f"- **{dt}** ({r.questionType}){grade_str}: {r.question}")
|
| 125 |
+
if r.resultSummary:
|
| 126 |
+
lines.append(f" โ {r.resultSummary[:200]}")
|
| 127 |
+
return "\n".join(lines)
|
| 128 |
+
|
| 129 |
+
def _enforceSizeLimit(self, conn: sqlite3.Connection) -> None:
|
| 130 |
+
"""DB ํฌ๊ธฐ ์ ํ โ ์ด๊ณผ ์ ์ค๋๋ ๋ ์ฝ๋ ์ญ์ ."""
|
| 131 |
+
try:
|
| 132 |
+
dbSize = self._dbPath.stat().st_size / (1024 * 1024)
|
| 133 |
+
if dbSize > _MAX_DB_SIZE_MB:
|
| 134 |
+
conn.execute(
|
| 135 |
+
"DELETE FROM analysis WHERE id IN (SELECT id FROM analysis ORDER BY timestamp ASC LIMIT 100)"
|
| 136 |
+
)
|
| 137 |
+
conn.execute("VACUUM")
|
| 138 |
+
conn.commit()
|
| 139 |
+
except OSError:
|
| 140 |
+
pass
|
| 141 |
+
|
| 142 |
+
def close(self) -> None:
|
| 143 |
+
"""์ฐ๊ฒฐ ์ข
๋ฃ."""
|
| 144 |
+
if self._conn:
|
| 145 |
+
self._conn.close()
|
| 146 |
+
self._conn = None
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def getMemory() -> AnalysisMemory:
|
| 150 |
+
"""์ฑ๊ธํด ๋ฉ๋ชจ๋ฆฌ ์ธ์คํด์ค."""
|
| 151 |
+
global _instance
|
| 152 |
+
if _instance is None:
|
| 153 |
+
_instance = AnalysisMemory()
|
| 154 |
+
return _instance
|
src/dartlab/ai/memory/summarizer.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""๋ถ์ ๊ฒฐ๊ณผ ์์ฝ๊ธฐ โ ๊ท์น ๊ธฐ๋ฐ (LLM ํธ์ถ ์์ด).
|
| 2 |
+
|
| 3 |
+
LLM ๋ต๋ณ์์ ์ ์ฅ์ฉ ์์ฝ์ ์ถ์ถํ๋ค.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import re
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def summarizeResponse(response: str, maxChars: int = 500) -> str:
|
| 12 |
+
"""LLM ์๋ต์์ ํต์ฌ ์์ฝ ์ถ์ถ."""
|
| 13 |
+
if not response:
|
| 14 |
+
return ""
|
| 15 |
+
|
| 16 |
+
# 1. "์ข
ํฉ" ๋๋ "๊ฒฐ๋ก " ์น์
์ถ์ถ ์๋
|
| 17 |
+
conclusionMatch = re.search(
|
| 18 |
+
r"(?:##?\s*(?:์ข
ํฉ|๊ฒฐ๋ก |์์ฝ|ํ๋จ|Bull|๊ฐ์ ).*?\n)(.*?)(?:\n##|\Z)",
|
| 19 |
+
response,
|
| 20 |
+
re.DOTALL,
|
| 21 |
+
)
|
| 22 |
+
if conclusionMatch:
|
| 23 |
+
text = conclusionMatch.group(1).strip()
|
| 24 |
+
return _cleanText(text, maxChars)
|
| 25 |
+
|
| 26 |
+
# 2. ๋ง์ง๋ง ๋จ๋ฝ ์ถ์ถ
|
| 27 |
+
paragraphs = [p.strip() for p in response.split("\n\n") if p.strip()]
|
| 28 |
+
if paragraphs:
|
| 29 |
+
lastParagraph = paragraphs[-1]
|
| 30 |
+
# ํ
์ด๋ธ์ด๋ ์ฝ๋ ๋ธ๋ก์ด ์๋ ๋ง์ง๋ง ํ
์คํธ ๋จ๋ฝ
|
| 31 |
+
for p in reversed(paragraphs):
|
| 32 |
+
if not p.startswith("|") and not p.startswith("```"):
|
| 33 |
+
return _cleanText(p, maxChars)
|
| 34 |
+
return _cleanText(lastParagraph, maxChars)
|
| 35 |
+
|
| 36 |
+
return _cleanText(response, maxChars)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def extractGrade(response: str) -> str | None:
|
| 40 |
+
"""์๋ต์์ ๋ฑ๊ธ ์ ๋ณด ์ถ์ถ."""
|
| 41 |
+
# "์ข
ํฉ ๋ฑ๊ธ: B+" ๊ฐ์ ํจํด
|
| 42 |
+
gradeMatch = re.search(r"์ข
ํฉ\s*(?:๋ฑ๊ธ|์ ์)\s*[:๏ผ]\s*([A-F][+-]?)", response)
|
| 43 |
+
if gradeMatch:
|
| 44 |
+
return gradeMatch.group(1)
|
| 45 |
+
return None
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _cleanText(text: str, maxChars: int) -> str:
|
| 49 |
+
"""๋งํฌ๋ค์ด ์ ๋ฆฌ + ๊ธธ์ด ์ ํ."""
|
| 50 |
+
# ๋งํฌ๋ค์ด ํค๋, ๋ณผ๋, ์ด๋ชจ์ง ์ ๊ฑฐ
|
| 51 |
+
cleaned = re.sub(r"[#*_`]", "", text)
|
| 52 |
+
cleaned = re.sub(r"\s+", " ", cleaned).strip()
|
| 53 |
+
if len(cleaned) > maxChars:
|
| 54 |
+
return cleaned[: maxChars - 3] + "..."
|
| 55 |
+
return cleaned
|
src/dartlab/ai/providers/oauth_codex.py
CHANGED
|
@@ -29,22 +29,68 @@ log = logging.getLogger(__name__)
|
|
| 29 |
CODEX_API_BASE = "https://chatgpt.com/backend-api"
|
| 30 |
CODEX_RESPONSES_PATH = "/codex/responses"
|
| 31 |
|
| 32 |
-
|
| 33 |
"gpt-5.4",
|
| 34 |
-
"gpt-5.3",
|
| 35 |
"gpt-5.3-codex",
|
| 36 |
-
"gpt-5.2",
|
| 37 |
"gpt-5.2-codex",
|
| 38 |
-
"gpt-5.1",
|
| 39 |
-
"gpt-5.1-codex",
|
| 40 |
-
"gpt-5.1-codex-mini",
|
| 41 |
-
"o3",
|
| 42 |
-
"o4-mini",
|
| 43 |
-
"gpt-4.1",
|
| 44 |
-
"gpt-4.1-mini",
|
| 45 |
-
"gpt-4.1-nano",
|
| 46 |
]
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
class ChatGPTOAuthError(Exception):
|
| 50 |
"""ChatGPT OAuth provider ์๋ฌ โ action ํ๋๋ก ์ฌ์ฉ์ ๋์ ์๋ด."""
|
|
|
|
| 29 |
CODEX_API_BASE = "https://chatgpt.com/backend-api"
|
| 30 |
CODEX_RESPONSES_PATH = "/codex/responses"
|
| 31 |
|
| 32 |
+
_BUNDLED_MODELS = [
|
| 33 |
"gpt-5.4",
|
|
|
|
| 34 |
"gpt-5.3-codex",
|
|
|
|
| 35 |
"gpt-5.2-codex",
|
| 36 |
+
"gpt-5.1-codex-max",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
]
|
| 38 |
|
| 39 |
+
_MODELS_CACHE: list[str] | None = None
|
| 40 |
+
_MODELS_CACHE_TS: float = 0.0
|
| 41 |
+
_MODELS_CACHE_TTL = 300.0 # 5๋ถ
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def _fetchRemoteModels(token: str) -> list[str] | None:
|
| 45 |
+
"""์๊ฒฉ /models API์์ ์ฌ์ฉ ๊ฐ๋ฅํ ๋ชจ๋ธ ๋ชฉ๋ก ์กฐํ (Codex CLI ๋์ผ ๋ฐฉ์)."""
|
| 46 |
+
url = f"{CODEX_API_BASE}/codex/models"
|
| 47 |
+
headers = {
|
| 48 |
+
"Authorization": f"Bearer {token}",
|
| 49 |
+
"originator": "codex_cli_rs",
|
| 50 |
+
}
|
| 51 |
+
accountId = oauthToken.get_account_id()
|
| 52 |
+
if accountId:
|
| 53 |
+
headers["chatgpt-account-id"] = accountId
|
| 54 |
+
try:
|
| 55 |
+
resp = requests.get(url, headers=headers, timeout=10)
|
| 56 |
+
if resp.status_code != 200:
|
| 57 |
+
return None
|
| 58 |
+
data = resp.json()
|
| 59 |
+
models = []
|
| 60 |
+
for item in data if isinstance(data, list) else data.get("models", data.get("data", [])):
|
| 61 |
+
modelId = item.get("id") or item.get("model") if isinstance(item, dict) else str(item)
|
| 62 |
+
if modelId:
|
| 63 |
+
models.append(modelId)
|
| 64 |
+
return models if models else None
|
| 65 |
+
except (requests.RequestException, json.JSONDecodeError, ValueError):
|
| 66 |
+
return None
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def availableModels() -> list[str]:
|
| 70 |
+
"""์ฌ์ฉ ๊ฐ๋ฅํ ๋ชจ๋ธ ๋ชฉ๋ก โ ์๊ฒฉ ์กฐํ + ์บ์ + ๋ฒ๋ค fallback."""
|
| 71 |
+
import time
|
| 72 |
+
|
| 73 |
+
global _MODELS_CACHE, _MODELS_CACHE_TS
|
| 74 |
+
now = time.time()
|
| 75 |
+
if _MODELS_CACHE and (now - _MODELS_CACHE_TS) < _MODELS_CACHE_TTL:
|
| 76 |
+
return _MODELS_CACHE
|
| 77 |
+
|
| 78 |
+
try:
|
| 79 |
+
token = oauthToken.get_valid_token()
|
| 80 |
+
except (TokenRefreshError, OSError):
|
| 81 |
+
token = None
|
| 82 |
+
|
| 83 |
+
if token:
|
| 84 |
+
remote = _fetchRemoteModels(token)
|
| 85 |
+
if remote:
|
| 86 |
+
_MODELS_CACHE = remote
|
| 87 |
+
_MODELS_CACHE_TS = now
|
| 88 |
+
return remote
|
| 89 |
+
|
| 90 |
+
_MODELS_CACHE = list(_BUNDLED_MODELS)
|
| 91 |
+
_MODELS_CACHE_TS = now
|
| 92 |
+
return _MODELS_CACHE
|
| 93 |
+
|
| 94 |
|
| 95 |
class ChatGPTOAuthError(Exception):
|
| 96 |
"""ChatGPT OAuth provider ์๋ฌ โ action ํ๋๋ก ์ฌ์ฉ์ ๋์ ์๋ด."""
|
src/dartlab/ai/runtime/agent.py
CHANGED
|
@@ -10,6 +10,7 @@ import json
|
|
| 10 |
from typing import Any, Callable, Generator
|
| 11 |
|
| 12 |
from dartlab.ai.providers.base import BaseProvider
|
|
|
|
| 13 |
from dartlab.ai.tools.registry import (
|
| 14 |
build_tool_runtime,
|
| 15 |
)
|
|
@@ -51,6 +52,7 @@ def agent_loop(
|
|
| 51 |
"""
|
| 52 |
tool_runtime = runtime or build_tool_runtime(company, name="agent-loop")
|
| 53 |
tools = selectTools(tool_runtime, questionType=question_type, maxTools=max_tools, hasCompany=company is not None)
|
|
|
|
| 54 |
|
| 55 |
last_answer = ""
|
| 56 |
|
|
@@ -66,10 +68,17 @@ def agent_loop(
|
|
| 66 |
|
| 67 |
# ๋๊ตฌ ์คํ + ๊ฒฐ๊ณผ ์ถ๊ฐ
|
| 68 |
for tc in response.tool_calls:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
if on_tool_call:
|
| 70 |
on_tool_call(tc.name, tc.arguments)
|
| 71 |
|
| 72 |
result = tool_runtime.execute_tool(tc.name, tc.arguments)
|
|
|
|
| 73 |
|
| 74 |
if on_tool_result:
|
| 75 |
on_tool_result(tc.name, result)
|
|
@@ -92,6 +101,21 @@ _REFLECTION_PROMPT = (
|
|
| 92 |
)
|
| 93 |
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
def _reflect_on_answer(provider: BaseProvider, messages: list[dict], answer: str) -> str:
|
| 96 |
"""๋ต๋ณ ์์ฒด ๊ฒ์ฆ โ 1ํ reflection์ผ๋ก ํ์ง ๋ณด์."""
|
| 97 |
reflect_messages = [
|
|
@@ -123,6 +147,7 @@ def agent_loop_stream(
|
|
| 123 |
"""
|
| 124 |
tool_runtime = runtime or build_tool_runtime(company, name="agent-stream")
|
| 125 |
tools = selectTools(tool_runtime, questionType=question_type, maxTools=max_tools, hasCompany=company is not None)
|
|
|
|
| 126 |
|
| 127 |
# ๋ํํ ์ง๋ฌธ์ ์ฒซ ํด ๋๊ตฌ ๊ฐ์ ์ ํจ
|
| 128 |
_isConversation = question_type in ("๋ํ", "๋ฉํ")
|
|
@@ -157,10 +182,17 @@ def agent_loop_stream(
|
|
| 157 |
messages.append(provider.format_assistant_tool_calls(response.answer, response.tool_calls))
|
| 158 |
|
| 159 |
for tc in response.tool_calls:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
if on_tool_call:
|
| 161 |
on_tool_call(tc.name, tc.arguments)
|
| 162 |
|
| 163 |
result = tool_runtime.execute_tool(tc.name, tc.arguments)
|
|
|
|
| 164 |
|
| 165 |
if on_tool_result:
|
| 166 |
on_tool_result(tc.name, result)
|
|
@@ -259,7 +291,7 @@ def agent_loop_planning(
|
|
| 259 |
steps = plan.get("steps", [])[:max_steps]
|
| 260 |
|
| 261 |
# 2๋จ๊ณ: ๊ณํ ์์ฐจ ์คํ
|
| 262 |
-
|
| 263 |
for step in steps:
|
| 264 |
tool_name = step.get("tool", "")
|
| 265 |
args = step.get("args", {})
|
|
@@ -268,17 +300,13 @@ def agent_loop_planning(
|
|
| 268 |
on_tool_call(tool_name, args)
|
| 269 |
|
| 270 |
result = tool_runtime.execute_tool(tool_name, args)
|
|
|
|
| 271 |
|
| 272 |
if on_tool_result:
|
| 273 |
on_tool_result(tool_name, result)
|
| 274 |
|
| 275 |
-
results.append({"tool": tool_name, "result": result[:3000]})
|
| 276 |
-
|
| 277 |
# 3๋จ๊ณ: ์ข
ํฉ ๋ต๋ณ ์์ฑ
|
| 278 |
-
synthesis_parts = [f"์ง๋ฌธ: {question}", "", "## ์์ง๋ ๋ฐ์ดํฐ:"]
|
| 279 |
-
for r in results:
|
| 280 |
-
synthesis_parts.append(f"\n### {r['tool']}")
|
| 281 |
-
synthesis_parts.append(r["result"])
|
| 282 |
synthesis_parts.append("\n## ์ง์์ฌํญ:")
|
| 283 |
synthesis_parts.append(
|
| 284 |
"์ ๋ฐ์ดํฐ๋ฅผ ์ข
ํฉํ์ฌ ์ฌ์ฉ์ ์ง๋ฌธ์ ๋ํ ๊ตฌ์กฐํ๋ ๋ต๋ณ์ ์์ฑํ์ธ์. "
|
|
@@ -291,3 +319,92 @@ def agent_loop_planning(
|
|
| 291 |
]
|
| 292 |
final_resp = provider.complete(synth_messages)
|
| 293 |
return final_resp.answer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
from typing import Any, Callable, Generator
|
| 11 |
|
| 12 |
from dartlab.ai.providers.base import BaseProvider
|
| 13 |
+
from dartlab.ai.runtime.scratchpad import Scratchpad
|
| 14 |
from dartlab.ai.tools.registry import (
|
| 15 |
build_tool_runtime,
|
| 16 |
)
|
|
|
|
| 52 |
"""
|
| 53 |
tool_runtime = runtime or build_tool_runtime(company, name="agent-loop")
|
| 54 |
tools = selectTools(tool_runtime, questionType=question_type, maxTools=max_tools, hasCompany=company is not None)
|
| 55 |
+
pad = Scratchpad()
|
| 56 |
|
| 57 |
last_answer = ""
|
| 58 |
|
|
|
|
| 68 |
|
| 69 |
# ๋๊ตฌ ์คํ + ๊ฒฐ๊ณผ ์ถ๊ฐ
|
| 70 |
for tc in response.tool_calls:
|
| 71 |
+
# ์ค๋ณต ํธ์ถ ๋ฐฉ์ง
|
| 72 |
+
warning = pad.getDuplicateWarning(tc.name)
|
| 73 |
+
if warning:
|
| 74 |
+
messages.append(provider.format_tool_result(tc.id, warning))
|
| 75 |
+
continue
|
| 76 |
+
|
| 77 |
if on_tool_call:
|
| 78 |
on_tool_call(tc.name, tc.arguments)
|
| 79 |
|
| 80 |
result = tool_runtime.execute_tool(tc.name, tc.arguments)
|
| 81 |
+
pad.addEntry(tc.name, tc.arguments, result)
|
| 82 |
|
| 83 |
if on_tool_result:
|
| 84 |
on_tool_result(tc.name, result)
|
|
|
|
| 101 |
)
|
| 102 |
|
| 103 |
|
| 104 |
+
def _buildReflectionPrompt(questionType: str | None = None) -> str:
|
| 105 |
+
"""์คํฌ checkpoints๊ฐ ์์ผ๋ฉด reflection ํ๋กฌํํธ์ ์ถ๊ฐ."""
|
| 106 |
+
base = _REFLECTION_PROMPT
|
| 107 |
+
try:
|
| 108 |
+
from dartlab.ai.skills.registry import matchSkill
|
| 109 |
+
|
| 110 |
+
skill = matchSkill("", questionType=questionType)
|
| 111 |
+
if skill and skill.checkpoints:
|
| 112 |
+
checks = "\n".join(f"- {c}" for c in skill.checkpoints)
|
| 113 |
+
return base + f"\n\n**์ถ๊ฐ ๊ฒ์ฆ ๊ธฐ์ค ({skill.name}):**\n{checks}"
|
| 114 |
+
except Exception:
|
| 115 |
+
pass
|
| 116 |
+
return base
|
| 117 |
+
|
| 118 |
+
|
| 119 |
def _reflect_on_answer(provider: BaseProvider, messages: list[dict], answer: str) -> str:
|
| 120 |
"""๋ต๋ณ ์์ฒด ๊ฒ์ฆ โ 1ํ reflection์ผ๋ก ํ์ง ๋ณด์."""
|
| 121 |
reflect_messages = [
|
|
|
|
| 147 |
"""
|
| 148 |
tool_runtime = runtime or build_tool_runtime(company, name="agent-stream")
|
| 149 |
tools = selectTools(tool_runtime, questionType=question_type, maxTools=max_tools, hasCompany=company is not None)
|
| 150 |
+
pad = Scratchpad()
|
| 151 |
|
| 152 |
# ๋ํํ ์ง๋ฌธ์ ์ฒซ ํด ๋๊ตฌ ๊ฐ์ ์ ํจ
|
| 153 |
_isConversation = question_type in ("๋ํ", "๋ฉํ")
|
|
|
|
| 182 |
messages.append(provider.format_assistant_tool_calls(response.answer, response.tool_calls))
|
| 183 |
|
| 184 |
for tc in response.tool_calls:
|
| 185 |
+
# ์ค๋ณต ํธ์ถ ๋ฐฉ์ง
|
| 186 |
+
warning = pad.getDuplicateWarning(tc.name)
|
| 187 |
+
if warning:
|
| 188 |
+
messages.append(provider.format_tool_result(tc.id, warning))
|
| 189 |
+
continue
|
| 190 |
+
|
| 191 |
if on_tool_call:
|
| 192 |
on_tool_call(tc.name, tc.arguments)
|
| 193 |
|
| 194 |
result = tool_runtime.execute_tool(tc.name, tc.arguments)
|
| 195 |
+
pad.addEntry(tc.name, tc.arguments, result)
|
| 196 |
|
| 197 |
if on_tool_result:
|
| 198 |
on_tool_result(tc.name, result)
|
|
|
|
| 291 |
steps = plan.get("steps", [])[:max_steps]
|
| 292 |
|
| 293 |
# 2๋จ๊ณ: ๊ณํ ์์ฐจ ์คํ
|
| 294 |
+
pad = Scratchpad()
|
| 295 |
for step in steps:
|
| 296 |
tool_name = step.get("tool", "")
|
| 297 |
args = step.get("args", {})
|
|
|
|
| 300 |
on_tool_call(tool_name, args)
|
| 301 |
|
| 302 |
result = tool_runtime.execute_tool(tool_name, args)
|
| 303 |
+
pad.addEntry(tool_name, args, result)
|
| 304 |
|
| 305 |
if on_tool_result:
|
| 306 |
on_tool_result(tool_name, result)
|
| 307 |
|
|
|
|
|
|
|
| 308 |
# 3๋จ๊ณ: ์ข
ํฉ ๋ต๋ณ ์์ฑ
|
| 309 |
+
synthesis_parts = [f"์ง๋ฌธ: {question}", "", "## ์์ง๋ ๋ฐ์ดํฐ:", pad.toContext()]
|
|
|
|
|
|
|
|
|
|
| 310 |
synthesis_parts.append("\n## ์ง์์ฌํญ:")
|
| 311 |
synthesis_parts.append(
|
| 312 |
"์ ๋ฐ์ดํฐ๋ฅผ ์ข
ํฉํ์ฌ ์ฌ์ฉ์ ์ง๋ฌธ์ ๋ํ ๊ตฌ์กฐํ๋ ๋ต๋ณ์ ์์ฑํ์ธ์. "
|
|
|
|
| 319 |
]
|
| 320 |
final_resp = provider.complete(synth_messages)
|
| 321 |
return final_resp.answer
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 325 |
+
# ์์จ ํ์ ์์ด์ ํธ (Tier 2 โ ์์ ๋ถ์)
|
| 326 |
+
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 327 |
+
|
| 328 |
+
_SUFFICIENCY_HINT = (
|
| 329 |
+
"\n\n---\n"
|
| 330 |
+
"**์๋ด**: ์ถฉ๋ถํ ๋ฐ์ดํฐ๋ฅผ ์์งํ๋ค๋ฉด ๋๊ตฌ๋ฅผ ๋ ํธ์ถํ์ง ๋ง๊ณ ์ต์ข
๋ต๋ณ์ ์์ฑํ์ธ์. "
|
| 331 |
+
"์์ง ๋ถ์กฑํ๋ฉด ์ถ๊ฐ ๋๊ตฌ๋ฅผ ํธ์ถํ์ธ์."
|
| 332 |
+
)
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
def agentLoopAutonomous(
|
| 336 |
+
provider: BaseProvider,
|
| 337 |
+
messages: list[dict],
|
| 338 |
+
company: Any,
|
| 339 |
+
*,
|
| 340 |
+
maxTurns: int = 15,
|
| 341 |
+
maxTools: int | None = None,
|
| 342 |
+
runtime: ToolRuntime | None = None,
|
| 343 |
+
onToolCall: Callable[[str, dict], None] | None = None,
|
| 344 |
+
onToolResult: Callable[[str, str], None] | None = None,
|
| 345 |
+
questionType: str | None = None,
|
| 346 |
+
forceToolFirstTurn: bool = True,
|
| 347 |
+
) -> Generator[str, None, None]:
|
| 348 |
+
"""์์จ ํ์ ์์ด์ ํธ: LLM์ด ์ถฉ๋ถํ๋ค๊ณ ํ๋จํ ๋๊น์ง ๋๊ตฌ ํธ์ถ.
|
| 349 |
+
|
| 350 |
+
Phase 1 Scratchpad + Phase 4 Skill์ ํ์ฉํ์ฌ
|
| 351 |
+
report_mode์์ ๊น์ด ์๋ ๋ถ์์ ์ํํ๋ค.
|
| 352 |
+
"""
|
| 353 |
+
tool_runtime = runtime or build_tool_runtime(company, name="agent-autonomous")
|
| 354 |
+
tools = selectTools(tool_runtime, questionType=questionType, maxTools=maxTools, hasCompany=company is not None)
|
| 355 |
+
pad = Scratchpad(tokenBudget=12000)
|
| 356 |
+
|
| 357 |
+
_isConversation = questionType in ("๋ํ", "๋ฉํ")
|
| 358 |
+
|
| 359 |
+
for _turn in range(maxTurns):
|
| 360 |
+
kwargs: dict = {}
|
| 361 |
+
if _turn == 0 and forceToolFirstTurn and not _isConversation and company is not None:
|
| 362 |
+
kwargs["tool_choice"] = "any"
|
| 363 |
+
|
| 364 |
+
try:
|
| 365 |
+
response = provider.complete_with_tools(messages, tools, **kwargs)
|
| 366 |
+
except TypeError:
|
| 367 |
+
response = provider.complete_with_tools(messages, tools)
|
| 368 |
+
|
| 369 |
+
if not response.tool_calls:
|
| 370 |
+
if _turn == 0:
|
| 371 |
+
yield from provider.stream(messages)
|
| 372 |
+
return
|
| 373 |
+
if response.answer and response.answer.strip():
|
| 374 |
+
yield response.answer
|
| 375 |
+
else:
|
| 376 |
+
yield from provider.stream(messages)
|
| 377 |
+
return
|
| 378 |
+
|
| 379 |
+
messages.append(provider.format_assistant_tool_calls(response.answer, response.tool_calls))
|
| 380 |
+
|
| 381 |
+
for tc in response.tool_calls:
|
| 382 |
+
warning = pad.getDuplicateWarning(tc.name)
|
| 383 |
+
if warning:
|
| 384 |
+
messages.append(provider.format_tool_result(tc.id, warning))
|
| 385 |
+
continue
|
| 386 |
+
|
| 387 |
+
if onToolCall:
|
| 388 |
+
onToolCall(tc.name, tc.arguments)
|
| 389 |
+
|
| 390 |
+
result = tool_runtime.execute_tool(tc.name, tc.arguments)
|
| 391 |
+
pad.addEntry(tc.name, tc.arguments, result)
|
| 392 |
+
|
| 393 |
+
if onToolResult:
|
| 394 |
+
onToolResult(tc.name, result)
|
| 395 |
+
|
| 396 |
+
messages.append(provider.format_tool_result(tc.id, result))
|
| 397 |
+
|
| 398 |
+
# 3ํด ์ดํ๋ถํฐ ์ถฉ๋ถ์ฑ ํํธ + ์ฌ์ฉ ํํฉ์ user ๋ฉ์์ง๋ก ์ถ๊ฐ
|
| 399 |
+
if _turn >= 2:
|
| 400 |
+
usageSummary = pad.getUsageSummary()
|
| 401 |
+
messages.append({"role": "user", "content": usageSummary + _SUFFICIENCY_HINT})
|
| 402 |
+
|
| 403 |
+
# maxTurns ๋๋ฌ โ ์ต์ข
์ข
ํฉ ์์ฒญ
|
| 404 |
+
synthPrompt = (
|
| 405 |
+
f"๋๊ตฌ ํธ์ถ์ด ์ต๋ {maxTurns}ํด์ ๋๋ฌํ์ต๋๋ค. "
|
| 406 |
+
"์ง๊ธ๊น์ง ์์งํ ๋ฐ์ดํฐ๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ์ต์ข
์ข
ํฉ ๋ต๋ณ์ ์์ฑํ์ธ์.\n\n"
|
| 407 |
+
f"{pad.getUsageSummary()}"
|
| 408 |
+
)
|
| 409 |
+
messages.append({"role": "user", "content": synthPrompt})
|
| 410 |
+
yield from provider.stream(messages)
|
src/dartlab/ai/runtime/core.py
CHANGED
|
@@ -19,6 +19,7 @@ dartlab.ask(), server UI, CLI๊ฐ ๋ชจ๋ ์ด ์ฝ์ด๋ฅผ ์๋นํ๋ค.
|
|
| 19 |
|
| 20 |
from __future__ import annotations
|
| 21 |
|
|
|
|
| 22 |
from typing import Any, Generator
|
| 23 |
|
| 24 |
from dartlab.ai.runtime.events import AnalysisEvent
|
|
@@ -30,6 +31,7 @@ from dartlab.ai.runtime.post_processing import (
|
|
| 30 |
)
|
| 31 |
from dartlab.ai.runtime.run_modes import (
|
| 32 |
_run_agent,
|
|
|
|
| 33 |
_run_light_mode,
|
| 34 |
_run_stream,
|
| 35 |
)
|
|
@@ -99,6 +101,7 @@ def _build_included_evidence(included_tables: list[str]) -> list[dict[str, str]]
|
|
| 99 |
"BS_quarterly": "๋ถ๊ธฐ๋ณ ์ฌ๋ฌด์ํํ",
|
| 100 |
"_dart_openapi_filings": "์ต๊ทผ ๊ณต์ ๋ชฉ๋ก",
|
| 101 |
"_diff": "๊ณต์ ๋ณํ ๋น๊ต",
|
|
|
|
| 102 |
"_response_contract": "์๋ต ๊ณ์ฝ",
|
| 103 |
"_clarify": "ํ์ธ ์ง๋ฌธ",
|
| 104 |
}
|
|
@@ -147,6 +150,7 @@ def _context_label(module_name: str, explicit_label: str | None = None) -> str |
|
|
| 147 |
"segments": "์ฌ์
๋ถ๋ฌธ ๋ฐ์ดํฐ",
|
| 148 |
"_dart_openapi_filings": "์ต๊ทผ ๊ณต์ ๋ชฉ๋ก",
|
| 149 |
"_diff": "๊ณต์ ๋ณํ ๋น๊ต",
|
|
|
|
| 150 |
}.items()
|
| 151 |
if normalized == key or module_name == key
|
| 152 |
),
|
|
@@ -828,6 +832,17 @@ def _analyze_inner(
|
|
| 828 |
dataReadyBlock = f"๋ฐ์ดํฐ ๊ฐ์ฉ์ฑ\n{dataReadySummary}"
|
| 829 |
dynamic_part = f"{dynamic_part}\n\n{dataReadyBlock}" if dynamic_part else dataReadyBlock
|
| 830 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 831 |
if dialogue_policy:
|
| 832 |
dynamic_part = dynamic_part + "\n\n" + dialogue_policy if dynamic_part else dialogue_policy
|
| 833 |
|
|
@@ -885,12 +900,17 @@ def _analyze_inner(
|
|
| 885 |
# ๋ชจ๋ provider์์ Super Tool ๋ชจ๋ ๊ธฐ๋ณธ ํ์ฑํ โ 8๊ฐ ๋๊ตฌ๋ก ํตํฉ
|
| 886 |
_useSuperTools = True
|
| 887 |
effective_turns = max(max_turns, _estimate_max_turns(question, q_type or ""))
|
| 888 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 889 |
llm,
|
| 890 |
messages,
|
| 891 |
company,
|
| 892 |
question,
|
| 893 |
-
max_turns=
|
| 894 |
max_tools=max_tools,
|
| 895 |
q_type=q_type,
|
| 896 |
useSuperTools=_useSuperTools,
|
|
@@ -932,6 +952,24 @@ def _analyze_inner(
|
|
| 932 |
if response_meta.get("grade") or response_meta.get("has_conclusion"):
|
| 933 |
_done_payload["responseMeta"] = response_meta
|
| 934 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 935 |
# โโ 15. Meta ์
๋ฐ์ดํธ (includedModules, yearRange) โโ
|
| 936 |
if _included_tables:
|
| 937 |
includedEvidence = _build_included_evidence(_included_tables)
|
|
|
|
| 19 |
|
| 20 |
from __future__ import annotations
|
| 21 |
|
| 22 |
+
import sqlite3
|
| 23 |
from typing import Any, Generator
|
| 24 |
|
| 25 |
from dartlab.ai.runtime.events import AnalysisEvent
|
|
|
|
| 31 |
)
|
| 32 |
from dartlab.ai.runtime.run_modes import (
|
| 33 |
_run_agent,
|
| 34 |
+
_run_agent_autonomous,
|
| 35 |
_run_light_mode,
|
| 36 |
_run_stream,
|
| 37 |
)
|
|
|
|
| 101 |
"BS_quarterly": "๋ถ๊ธฐ๋ณ ์ฌ๋ฌด์ํํ",
|
| 102 |
"_dart_openapi_filings": "์ต๊ทผ ๊ณต์ ๋ชฉ๋ก",
|
| 103 |
"_diff": "๊ณต์ ๋ณํ ๋น๊ต",
|
| 104 |
+
"_changes": "๊ณต์ ๋ณํ ์์ฝ",
|
| 105 |
"_response_contract": "์๋ต ๊ณ์ฝ",
|
| 106 |
"_clarify": "ํ์ธ ์ง๋ฌธ",
|
| 107 |
}
|
|
|
|
| 150 |
"segments": "์ฌ์
๋ถ๋ฌธ ๋ฐ์ดํฐ",
|
| 151 |
"_dart_openapi_filings": "์ต๊ทผ ๊ณต์ ๋ชฉ๋ก",
|
| 152 |
"_diff": "๊ณต์ ๋ณํ ๋น๊ต",
|
| 153 |
+
"_changes": "๊ณต์ ๋ณํ ์์ฝ",
|
| 154 |
}.items()
|
| 155 |
if normalized == key or module_name == key
|
| 156 |
),
|
|
|
|
| 832 |
dataReadyBlock = f"๋ฐ์ดํฐ ๊ฐ์ฉ์ฑ\n{dataReadySummary}"
|
| 833 |
dynamic_part = f"{dynamic_part}\n\n{dataReadyBlock}" if dynamic_part else dataReadyBlock
|
| 834 |
|
| 835 |
+
# ์ด์ ๋ถ์ ๊ธฐ๋ก ์ฃผ์
(์ธ์
๊ฐ ๋ฉ๋ชจ๋ฆฌ)
|
| 836 |
+
if stock_id:
|
| 837 |
+
try:
|
| 838 |
+
from dartlab.ai.memory.store import getMemory
|
| 839 |
+
|
| 840 |
+
memoryContext = getMemory().toPromptContext(stock_id)
|
| 841 |
+
if memoryContext:
|
| 842 |
+
dynamic_part = f"{dynamic_part}\n\n{memoryContext}" if dynamic_part else memoryContext
|
| 843 |
+
except (ImportError, OSError, sqlite3.Error):
|
| 844 |
+
pass
|
| 845 |
+
|
| 846 |
if dialogue_policy:
|
| 847 |
dynamic_part = dynamic_part + "\n\n" + dialogue_policy if dynamic_part else dialogue_policy
|
| 848 |
|
|
|
|
| 900 |
# ๋ชจ๋ provider์์ Super Tool ๋ชจ๋ ๊ธฐ๋ณธ ํ์ฑํ โ 8๊ฐ ๋๊ตฌ๋ก ํตํฉ
|
| 901 |
_useSuperTools = True
|
| 902 |
effective_turns = max(max_turns, _estimate_max_turns(question, q_type or ""))
|
| 903 |
+
|
| 904 |
+
# report_mode โ ์์จ ํ์ ์์ด์ ํธ (Tier 2)
|
| 905 |
+
_agent_fn = _run_agent_autonomous if report_mode else _run_agent
|
| 906 |
+
_effective_max = max(effective_turns, 15) if report_mode else effective_turns
|
| 907 |
+
|
| 908 |
+
for _ev in _agent_fn(
|
| 909 |
llm,
|
| 910 |
messages,
|
| 911 |
company,
|
| 912 |
question,
|
| 913 |
+
max_turns=_effective_max,
|
| 914 |
max_tools=max_tools,
|
| 915 |
q_type=q_type,
|
| 916 |
useSuperTools=_useSuperTools,
|
|
|
|
| 952 |
if response_meta.get("grade") or response_meta.get("has_conclusion"):
|
| 953 |
_done_payload["responseMeta"] = response_meta
|
| 954 |
|
| 955 |
+
# โโ 14.5. ๋ถ์ ๋ฉ๋ชจ๋ฆฌ ์ ์ฅ โโ
|
| 956 |
+
if stock_id and _full_response_parts:
|
| 957 |
+
try:
|
| 958 |
+
from dartlab.ai.memory.store import getMemory
|
| 959 |
+
from dartlab.ai.memory.summarizer import extractGrade, summarizeResponse
|
| 960 |
+
|
| 961 |
+
_fullText = "".join(_full_response_parts)
|
| 962 |
+
_mem = getMemory()
|
| 963 |
+
_mem.saveAnalysis(
|
| 964 |
+
stockCode=stock_id,
|
| 965 |
+
question=question[:200],
|
| 966 |
+
questionType=q_type or "",
|
| 967 |
+
resultSummary=summarizeResponse(_fullText),
|
| 968 |
+
grade=extractGrade(_fullText),
|
| 969 |
+
)
|
| 970 |
+
except (ImportError, OSError, sqlite3.Error):
|
| 971 |
+
pass
|
| 972 |
+
|
| 973 |
# โโ 15. Meta ์
๋ฐ์ดํธ (includedModules, yearRange) โโ
|
| 974 |
if _included_tables:
|
| 975 |
includedEvidence = _build_included_evidence(_included_tables)
|
src/dartlab/ai/runtime/run_modes.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
"""AI ๋ถ์ ์คํ ๋ชจ๋ โ light / guided_json / stream / agent.
|
| 2 |
|
| 3 |
-
core.py์ _analyze_inner()์์ ๋์คํจ์นํ๋
|
| 4 |
"""
|
| 5 |
|
| 6 |
from __future__ import annotations
|
|
@@ -249,3 +249,88 @@ def _run_agent(
|
|
| 249 |
yield AnalysisEvent("chart", chart_events.pop(0))
|
| 250 |
while ui_events:
|
| 251 |
yield ui_events.pop(0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""AI ๋ถ์ ์คํ ๋ชจ๋ โ light / guided_json / stream / agent / autonomous.
|
| 2 |
|
| 3 |
+
core.py์ _analyze_inner()์์ ๋์คํจ์นํ๋ 5๊ฐ์ง ์คํ ๊ฒฝ๋ก.
|
| 4 |
"""
|
| 5 |
|
| 6 |
from __future__ import annotations
|
|
|
|
| 249 |
yield AnalysisEvent("chart", chart_events.pop(0))
|
| 250 |
while ui_events:
|
| 251 |
yield ui_events.pop(0)
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
# โโ Autonomous agent mode (Tier 2) โโโโโโโโโโโโโโโโโโโโโโ
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
def _run_agent_autonomous(
|
| 258 |
+
llm,
|
| 259 |
+
messages: list[dict],
|
| 260 |
+
company: Any,
|
| 261 |
+
question: str,
|
| 262 |
+
*,
|
| 263 |
+
max_turns: int = 15,
|
| 264 |
+
max_tools: int | None = None,
|
| 265 |
+
q_type: str | None = None,
|
| 266 |
+
useSuperTools: bool = True,
|
| 267 |
+
_full_response_parts: list[str],
|
| 268 |
+
) -> Generator[AnalysisEvent, None, None]:
|
| 269 |
+
"""์์จ ํ์ ์์ด์ ํธ โ report_mode์์ ๊น์ด ๋ถ์."""
|
| 270 |
+
from dartlab.ai.runtime.agent import agentLoopAutonomous, build_agent_system_addition
|
| 271 |
+
from dartlab.ai.tools.registry import build_tool_runtime
|
| 272 |
+
|
| 273 |
+
runtime = build_tool_runtime(company, name="core-autonomous", useSuperTools=useSuperTools)
|
| 274 |
+
|
| 275 |
+
system_addition = build_agent_system_addition(runtime)
|
| 276 |
+
messages[0]["content"] += system_addition
|
| 277 |
+
|
| 278 |
+
tool_calls_log: list[dict] = []
|
| 279 |
+
tool_results_log: list[dict] = []
|
| 280 |
+
chart_events: list[dict] = []
|
| 281 |
+
ui_events: list[AnalysisEvent] = []
|
| 282 |
+
|
| 283 |
+
def _on_tool_call(name: str, arguments: dict) -> None:
|
| 284 |
+
tool_calls_log.append({"name": name, "arguments": arguments})
|
| 285 |
+
|
| 286 |
+
def _on_tool_result(name: str, result: str) -> None:
|
| 287 |
+
tool_results_log.append({"name": name, "result": result})
|
| 288 |
+
if name == "chart":
|
| 289 |
+
try:
|
| 290 |
+
parsed = json.loads(result)
|
| 291 |
+
charts = parsed.get("charts")
|
| 292 |
+
if charts:
|
| 293 |
+
chart_events.append({"charts": charts})
|
| 294 |
+
except (json.JSONDecodeError, TypeError, KeyError):
|
| 295 |
+
pass
|
| 296 |
+
try:
|
| 297 |
+
parsed = json.loads(result)
|
| 298 |
+
if isinstance(parsed, dict) and parsed.get("action"):
|
| 299 |
+
ui_events.append(AnalysisEvent(EventKind.UI_ACTION, parsed))
|
| 300 |
+
except (json.JSONDecodeError, TypeError):
|
| 301 |
+
pass
|
| 302 |
+
|
| 303 |
+
for chunk in agentLoopAutonomous(
|
| 304 |
+
llm,
|
| 305 |
+
messages,
|
| 306 |
+
company,
|
| 307 |
+
maxTurns=max_turns,
|
| 308 |
+
maxTools=max_tools,
|
| 309 |
+
runtime=runtime,
|
| 310 |
+
onToolCall=_on_tool_call,
|
| 311 |
+
onToolResult=_on_tool_result,
|
| 312 |
+
questionType=q_type,
|
| 313 |
+
):
|
| 314 |
+
while tool_calls_log:
|
| 315 |
+
tc = tool_calls_log.pop(0)
|
| 316 |
+
yield AnalysisEvent("tool_call", tc)
|
| 317 |
+
while tool_results_log:
|
| 318 |
+
tr = tool_results_log.pop(0)
|
| 319 |
+
yield AnalysisEvent("tool_result", tr)
|
| 320 |
+
while chart_events:
|
| 321 |
+
ce = chart_events.pop(0)
|
| 322 |
+
yield AnalysisEvent("chart", ce)
|
| 323 |
+
while ui_events:
|
| 324 |
+
yield ui_events.pop(0)
|
| 325 |
+
|
| 326 |
+
_full_response_parts.append(chunk)
|
| 327 |
+
yield AnalysisEvent("chunk", {"text": chunk})
|
| 328 |
+
|
| 329 |
+
while tool_calls_log:
|
| 330 |
+
yield AnalysisEvent("tool_call", tool_calls_log.pop(0))
|
| 331 |
+
while tool_results_log:
|
| 332 |
+
yield AnalysisEvent("tool_result", tool_results_log.pop(0))
|
| 333 |
+
while chart_events:
|
| 334 |
+
yield AnalysisEvent("chart", chart_events.pop(0))
|
| 335 |
+
while ui_events:
|
| 336 |
+
yield ui_events.pop(0)
|
src/dartlab/ai/runtime/scratchpad.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""๋๊ตฌ ๊ฒฐ๊ณผ ๋์ /์ ๋ฆฌ ์์ง โ dexter scratchpad ํจํด.
|
| 2 |
+
|
| 3 |
+
์์ด์ ํธ ๋ฃจํ์์ ๋๊ตฌ ํธ์ถ ๊ฒฐ๊ณผ๋ฅผ ๊ตฌ์กฐ์ ์ผ๋ก ๊ด๋ฆฌํ๋ค:
|
| 4 |
+
- ๋๊ตฌ๋ณ ํธ์ถ ํ์ ์ถ์ + ์ค๋ณต ๋ฐฉ์ง
|
| 5 |
+
- ํ ํฐ ์์ฐ ์ด๊ณผ ์ ์ค๋๋ ๊ฒฐ๊ณผ ์์ถ
|
| 6 |
+
- LLM์ ์ ๋ฌํ ์ ๋ฆฌ๋ ์ปจํ
์คํธ ์์ฑ
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
from dataclasses import dataclass, field
|
| 12 |
+
from typing import Any
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@dataclass
|
| 16 |
+
class _Entry:
|
| 17 |
+
"""๋จ์ผ ๋๊ตฌ ํธ์ถ ๊ฒฐ๊ณผ."""
|
| 18 |
+
|
| 19 |
+
toolName: str
|
| 20 |
+
args: dict[str, Any]
|
| 21 |
+
result: str
|
| 22 |
+
tokenEstimate: int
|
| 23 |
+
order: int
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@dataclass
|
| 27 |
+
class Scratchpad:
|
| 28 |
+
"""์์ด์ ํธ ๋ฃจํ ๋๊ตฌ ๊ฒฐ๊ณผ ๋์ /์ ๋ฆฌ."""
|
| 29 |
+
|
| 30 |
+
entries: list[_Entry] = field(default_factory=list)
|
| 31 |
+
callCounts: dict[str, int] = field(default_factory=dict)
|
| 32 |
+
_order: int = field(default=0, repr=False)
|
| 33 |
+
tokenBudget: int = 8000
|
| 34 |
+
|
| 35 |
+
# โโ ํต์ฌ API โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 36 |
+
|
| 37 |
+
def addEntry(self, toolName: str, args: dict[str, Any], result: str) -> None:
|
| 38 |
+
"""๋๊ตฌ ๊ฒฐ๊ณผ ์ถ๊ฐ (pruning ์๋ ์ ์ฉ)."""
|
| 39 |
+
from dartlab.ai.context.pruning import pruneToolResult
|
| 40 |
+
|
| 41 |
+
pruned = pruneToolResult(toolName, result)
|
| 42 |
+
tokens = _estimateTokens(pruned)
|
| 43 |
+
self._order += 1
|
| 44 |
+
self.entries.append(_Entry(toolName, args, pruned, tokens, self._order))
|
| 45 |
+
self.callCounts[toolName] = self.callCounts.get(toolName, 0) + 1
|
| 46 |
+
self.pruneIfNeeded()
|
| 47 |
+
|
| 48 |
+
def isDuplicateExceeded(self, toolName: str, maxCalls: int = 3) -> bool:
|
| 49 |
+
"""๊ฐ์ ๋๊ตฌ๊ฐ maxCalls ์ด์ ํธ์ถ๋๋์ง."""
|
| 50 |
+
return self.callCounts.get(toolName, 0) >= maxCalls
|
| 51 |
+
|
| 52 |
+
def pruneIfNeeded(self) -> None:
|
| 53 |
+
"""ํ ํฐ ์์ฐ ์ด๊ณผ ์ ์ค๋๋ ๊ฒฐ๊ณผ๋ฅผ 1์ค ์์ฝ์ผ๋ก ์์ถ."""
|
| 54 |
+
while self._totalTokens() > self.tokenBudget and len(self.entries) > 1:
|
| 55 |
+
oldest = self.entries[0]
|
| 56 |
+
summary = _summarizeLine(oldest.toolName, oldest.result)
|
| 57 |
+
oldest.result = summary
|
| 58 |
+
oldest.tokenEstimate = _estimateTokens(summary)
|
| 59 |
+
|
| 60 |
+
# ์์ฝํด๋ ์ฌ์ ํ ์ด๊ณผ๋ฉด ์ ๊ฑฐ
|
| 61 |
+
if self._totalTokens() > self.tokenBudget:
|
| 62 |
+
self.entries.pop(0)
|
| 63 |
+
|
| 64 |
+
def toContext(self) -> str:
|
| 65 |
+
"""๋์ ๊ฒฐ๊ณผ๋ฅผ ๋งํฌ๋ค์ด์ผ๋ก ๋ณํ."""
|
| 66 |
+
if not self.entries:
|
| 67 |
+
return ""
|
| 68 |
+
parts: list[str] = []
|
| 69 |
+
for e in self.entries:
|
| 70 |
+
argsStr = ", ".join(f"{k}={v}" for k, v in e.args.items()) if e.args else ""
|
| 71 |
+
parts.append(f"### {e.toolName}({argsStr})\n{e.result}")
|
| 72 |
+
return "\n\n".join(parts)
|
| 73 |
+
|
| 74 |
+
def getUsageSummary(self) -> str:
|
| 75 |
+
"""ํ์ฌ ๋๊ตฌ ํธ์ถ ํํฉ ํ
์คํธ."""
|
| 76 |
+
if not self.callCounts:
|
| 77 |
+
return ""
|
| 78 |
+
lines = [f"- {name}: {count}ํ" for name, count in self.callCounts.items()]
|
| 79 |
+
total = self._totalTokens()
|
| 80 |
+
lines.append(f"- ์ปจํ
์คํธ: ~{total} ํ ํฐ / {self.tokenBudget} ์์ฐ")
|
| 81 |
+
return "**๋๊ตฌ ์ฌ์ฉ ํํฉ:**\n" + "\n".join(lines)
|
| 82 |
+
|
| 83 |
+
def getDuplicateWarning(self, toolName: str) -> str | None:
|
| 84 |
+
"""์ค๋ณต ์ด๊ณผ ์ LLM์ ์ ๋ฌํ ๊ฒฝ๊ณ ๋ฉ์์ง."""
|
| 85 |
+
if not self.isDuplicateExceeded(toolName):
|
| 86 |
+
return None
|
| 87 |
+
count = self.callCounts.get(toolName, 0)
|
| 88 |
+
return (
|
| 89 |
+
f"โ ๏ธ {toolName}์ ์ด๋ฏธ {count}ํ ํธ์ถํ์ต๋๋ค. "
|
| 90 |
+
f"๊ฐ์ ๋๊ตฌ๋ฅผ ๋ฐ๋ณต ํธ์ถํ์ง ๋ง๊ณ , ์์ง๋ ๋ฐ์ดํฐ๋ก ๋ต๋ณ์ ์ข
ํฉํ์ธ์."
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
# โโ ๋ด๋ถ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 94 |
+
|
| 95 |
+
def _totalTokens(self) -> int:
|
| 96 |
+
return sum(e.tokenEstimate for e in self.entries)
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def _estimateTokens(text: str) -> int:
|
| 100 |
+
"""๊ฐ์ด ํ ํฐ ์ถ์ โ ํ๊ธ 2์=1ํ ํฐ, ์๋ฌธ 4์=1ํ ํฐ ๊ทผ์ฌ."""
|
| 101 |
+
if not text:
|
| 102 |
+
return 0
|
| 103 |
+
korean = sum(1 for c in text if "\uac00" <= c <= "\ud7a3")
|
| 104 |
+
other = len(text) - korean
|
| 105 |
+
return korean // 2 + other // 4 + 1
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def _summarizeLine(toolName: str, result: str) -> str:
|
| 109 |
+
"""๋๊ตฌ ๊ฒฐ๊ณผ๋ฅผ 1์ค ์์ฝ์ผ๋ก ์์ถ."""
|
| 110 |
+
# ์ฒซ ์ค ๋๋ ์ฒซ 100์ + ์ค ์ ์ ๋ณด
|
| 111 |
+
lines = result.strip().split("\n")
|
| 112 |
+
firstLine = lines[0][:100] if lines else ""
|
| 113 |
+
if len(lines) > 1:
|
| 114 |
+
return f"[์์ฝ] {firstLine}... ({len(lines)}์ค, {toolName})"
|
| 115 |
+
return f"[์์ฝ] {firstLine}"
|
src/dartlab/ai/skills/__init__.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""๋ถ์ ์คํฌ โ ํ๋กฌํํธ ๊ธฐ๋ฐ ์ํฌํ๋ก์ฐ ๊ฐ์ด๋.
|
| 2 |
+
|
| 3 |
+
๋๊ตฌ๋ฅผ ์ง์ ํ์ง ์๊ณ ๋ถ์ ๋ชฉํ๋ง ์ ์ธํ๋ค.
|
| 4 |
+
LLM์ด ํ์ฌ ๊ฐ์ฉํ ๋๊ตฌ ์ค์์ ์์จ ์ ํ.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from dartlab.ai.skills.registry import Skill, matchSkill
|
| 8 |
+
|
| 9 |
+
__all__ = ["Skill", "matchSkill"]
|
src/dartlab/ai/skills/catalog.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""๋ถ์ ์์ญ๋ณ ์คํฌ ์นดํ๋ก๊ทธ.
|
| 2 |
+
|
| 3 |
+
๋๊ตฌ๋ฅผ ์ง์ ํ์ง ์๋๋ค โ ๋ถ์ ๋ชฉํ๋ง ์ ์ธ.
|
| 4 |
+
8๋ ์์ญ์ด ์์ ํ๋๋ฉด์ ์์ฐ์ค๋ฝ๊ฒ ํจ๊ณผ๊ฐ ํฅ์๋๋ค.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
from dartlab.ai.skills.registry import Skill
|
| 10 |
+
|
| 11 |
+
SKILLS: tuple[Skill, ...] = (
|
| 12 |
+
Skill(
|
| 13 |
+
id="profitability",
|
| 14 |
+
name="์์ต์ฑ ์ฌ์ธต ๋ถ์",
|
| 15 |
+
triggerKeywords=("์์ต์ฑ", "์ด์ต๋ฅ ", "๋ง์ง", "ROE", "ROA", "์์
์ด์ต๋ฅ "),
|
| 16 |
+
analysisGoals=(
|
| 17 |
+
"ROE๋ฅผ DuPont ๋ถํดํ์ฌ ์์ต์ฑ/ํจ์จ์ฑ/๋ ๋ฒ๋ฆฌ์ง ๋์ธ ์๋ณ",
|
| 18 |
+
"์์
์ด์ต๋ฅ ๊ณผ ์๊ฐ์จ ์ถ์ธ์์ ๋น์ฉ ๊ตฌ์กฐ ๋ณํ ํ์
",
|
| 19 |
+
"์์
CF/์์ด์ต ๋น์จ๋ก ์ด์ต์ ์ง ํ๋จ",
|
| 20 |
+
"๋ถ๋ฌธ๋ณ ์์ต์ฑ ์ฐจ์ด๊ฐ ์์ผ๋ฉด ์ธ๊ทธ๋จผํธ ๋ถํด",
|
| 21 |
+
),
|
| 22 |
+
synthesisGuide="DuPont ๋ถํด โ ์ด์ต์ ์ง โ ๋น์ฉ ๋์ธ โ ์ธ๊ณผ ๊ด๊ณ ์์ ",
|
| 23 |
+
checkpoints=(
|
| 24 |
+
"DuPont 3์์ ๋ถํด๊ฐ ์๋๊ฐ?",
|
| 25 |
+
"CF/NI ๋น์จ์ ์ธ์ฉํ๋๊ฐ?",
|
| 26 |
+
"๋น์ฉ ๊ตฌ์กฐ ๋ณํ์ ์์ธ์ ์ค๋ช
ํ๋๊ฐ?",
|
| 27 |
+
),
|
| 28 |
+
),
|
| 29 |
+
Skill(
|
| 30 |
+
id="health",
|
| 31 |
+
name="์ฌ๋ฌด ๊ฑด์ ์ฑ ๋ถ์",
|
| 32 |
+
triggerKeywords=("๊ฑด์ ์ฑ", "๋ถ์ฑ", "์ ๋์ฑ", "์์ ์ฑ", "์ฌ๋ฌด๊ตฌ์กฐ", "๋ถ์ฑ๋น์จ"),
|
| 33 |
+
analysisGoals=(
|
| 34 |
+
"๋ถ์ฑ๋น์จ๊ณผ ์ ๋๋น์จ ์ถ์ธ๋ก ๊ตฌ์กฐ์ ์์ ์ฑ ํ๋จ",
|
| 35 |
+
"์ด์๋ณด์๋ฐฐ์จ๊ณผ ์ฐจ์
๊ธ ๋ง๊ธฐ ๊ตฌ์กฐ ํ์ธ",
|
| 36 |
+
"์ด์ ์๋ณธ ์ฌ์ดํด(๋งค์ถ์ฑ๊ถ+์ฌ๊ณ -๋งค์
์ฑ๋ฌด) ์ถ์ด ๋ถ์",
|
| 37 |
+
"ํ๊ธ์ฑ ์์ฐ ๋๋น ๋จ๊ธฐ ์๋ฌด ์ปค๋ฒ๋ฆฌ์ง ํ์ธ",
|
| 38 |
+
),
|
| 39 |
+
synthesisGuide="๋ ๋ฒ๋ฆฌ์ง ๊ตฌ์กฐ โ ์ ๋์ฑ ๊ณ์ธต โ ๋ถ์ฑ ๋ง๊ธฐ โ ์ข
ํฉ ๊ฑด์ ์ฑ ํ๋จ",
|
| 40 |
+
checkpoints=(
|
| 41 |
+
"์ ๋๋น์จ๊ณผ ๋ถ์ฑ๋น์จ ์์น๋ฅผ ์ธ์ฉํ๋๊ฐ?",
|
| 42 |
+
"์ด์๋ณด์๋ฐฐ์จ์ ํ์ธํ๋๊ฐ?",
|
| 43 |
+
"๋จ๊ธฐ ์ ๋์ฑ ์ํ์ ํ๊ฐํ๋๊ฐ?",
|
| 44 |
+
),
|
| 45 |
+
),
|
| 46 |
+
Skill(
|
| 47 |
+
id="valuation",
|
| 48 |
+
name="๋ฐธ๋ฅ์์ด์
๋ถ์",
|
| 49 |
+
triggerKeywords=("๋ฐธ๋ฅ์์ด์
", "์ ์ ๊ฐ์น", "๋ชฉํ๊ฐ", "์ ํ๊ฐ", "๊ณ ํ๊ฐ", "PER", "PBR", "DCF"),
|
| 50 |
+
analysisGoals=(
|
| 51 |
+
"ํต์ฌ ๋ฉํฐํ(PER, PBR, EV/EBITDA) ์ฐ์ถ ๋ฐ ์
์ข
๋น๊ต",
|
| 52 |
+
"์ด์ต ์ฑ์ฅ๋ฅ ๊ณผ ์ง์๊ฐ๋ฅ์ฑ์ ๊ทผ๊ฑฐ๋ก ์ ์ ๋ฉํฐํ ๋ฒ์ ์ถ์ ",
|
| 53 |
+
"๊ฐ๋ฅํ๋ฉด DCF ๊ด์ ์์ ๋ด์ฌ๊ฐ์น ๋ฒ์ ์ ์",
|
| 54 |
+
"์์ ๋ง์ง(ํ์ฌ๊ฐ vs ์ ์ ๊ฐ์น ๋ฒ์) ํ๋จ",
|
| 55 |
+
),
|
| 56 |
+
synthesisGuide="๋ฉํฐํ ๋น๊ต โ ์ฑ์ฅ๋ฅ ๊ทผ๊ฑฐ โ ์ ์ ๊ฐ์น ๋ฒ์ โ ์์ ๋ง์ง ํ๋จ",
|
| 57 |
+
checkpoints=(
|
| 58 |
+
"PER/PBR ์์น์ ์
์ข
๋น๊ต๊ฐ ์๋๊ฐ?",
|
| 59 |
+
"์ฑ์ฅ๋ฅ ๊ทผ๊ฑฐ๋ฅผ ์ ์ํ๋๊ฐ?",
|
| 60 |
+
"์ ์ ๊ฐ์น ๋ฒ์๋ฅผ ์ ์ํ๋๊ฐ? (๋จ์ผ ๋ชฉํ๊ฐ ์๋ ๋ฒ์)",
|
| 61 |
+
),
|
| 62 |
+
),
|
| 63 |
+
Skill(
|
| 64 |
+
id="risk",
|
| 65 |
+
name="๋ฆฌ์คํฌ ๋ถ์",
|
| 66 |
+
triggerKeywords=("๋ฆฌ์คํฌ", "์ํ", "์๊ธฐ", "๋ถํ์ค์ฑ", "์ ์์ ํธ"),
|
| 67 |
+
analysisGoals=(
|
| 68 |
+
"์ฌ๋ฌด ๋ฆฌ์คํฌ: ์ ๋์ฑ, ๋ ๋ฒ๋ฆฌ์ง, ์ด์๋ณด์ ์ญ๋",
|
| 69 |
+
"์ฌ์
๋ฆฌ์คํฌ: ๋งค์ถ์ฒ ์ง์ค, ๊ณต๊ธ๋ง ์์กด, ๊ท์ ๋ณํ",
|
| 70 |
+
"ํ๊ณ ๋ฆฌ์คํฌ: ๊ฐ์ฌ์๊ฒฌ ๋ณํ, ํน์๊ด๊ณ์ ๊ฑฐ๋, ํ๊ณ์ ์ฑ
๋ณ๊ฒฝ",
|
| 71 |
+
"๊ณต์์์ ๊ฒฝ์์ง์ด ์ง์ ์ธ๊ธํ ๋ฆฌ์คํฌ ์์ธ ํ์ธ",
|
| 72 |
+
),
|
| 73 |
+
synthesisGuide="์ฌ๋ฌด ๋ฆฌ์คํฌ โ ์ฌ์
๋ฆฌ์คํฌ โ ํ๊ณ ๋ฆฌ์คํฌ โ ์ข
ํฉ ์ํ๋ ํ๋จ",
|
| 74 |
+
checkpoints=(
|
| 75 |
+
"์ ์ ์ ํธ ์ฒดํฌ๋ฆฌ์คํธ๋ฅผ ์ ์ฉํ๋๊ฐ?",
|
| 76 |
+
"๊ณต์ ์๋ฌธ์์ ๋ฆฌ์คํฌ ๊ด๋ จ ์์ ์ ์ธ์ฉํ๋๊ฐ?",
|
| 77 |
+
),
|
| 78 |
+
),
|
| 79 |
+
Skill(
|
| 80 |
+
id="strategy",
|
| 81 |
+
name="์ฌ์
์ ๋ต ๋ถ์",
|
| 82 |
+
triggerKeywords=("์ฌ์
", "์ ๋ต", "๊ฒฝ์์ฐ์", "๋น์ฆ๋์ค๋ชจ๋ธ", "์ฌ์
๊ตฌ์กฐ", "์ฌ์
๊ฐ์"),
|
| 83 |
+
analysisGoals=(
|
| 84 |
+
"์ฌ์
๊ตฌ์กฐ: ๋ถ๋ฌธ๋ณ ๋งค์ถ ๋น์ค๊ณผ ์์ต์ฑ ์ฐจ์ด",
|
| 85 |
+
"๊ฒฝ์ ์ฐ์: R&D ํฌ์ ๊ฐ๋, ๋ง์ง ํ๋ฆฌ๋ฏธ์, ๊ณ ๊ฐ ์ง์ค๋",
|
| 86 |
+
"์ฑ์ฅ ์ ๋ต: ์ ๊ธฐ์ ์ฑ์ฅ vs ์ธ์, CAPEX ๋ฐฉํฅ",
|
| 87 |
+
"๊ณต์ ์๋ฌธ์์ ๊ฒฝ์์ง์ ์ ๋ต ์์ ํ์ธ",
|
| 88 |
+
),
|
| 89 |
+
synthesisGuide="์ฌ์
๊ตฌ์กฐ ๋ถํด โ ๊ฒฝ์ ์ฐ์ ์๋ณ โ ์ฑ์ฅ ์ ๋ต ํ๊ฐ โ ์ง์๊ฐ๋ฅ์ฑ ํ๋จ",
|
| 90 |
+
checkpoints=(
|
| 91 |
+
"๋ถ๋ฌธ๋ณ ๋งค์ถ/์ด์ต ๋น์ค์ ๋ถํดํ๋๊ฐ?",
|
| 92 |
+
"R&D/CAPEX ํฌ์ ๋ฐฉํฅ์ ํ์ธํ๋๊ฐ?",
|
| 93 |
+
),
|
| 94 |
+
),
|
| 95 |
+
Skill(
|
| 96 |
+
id="accounting",
|
| 97 |
+
name="ํ๊ณ ํ์ง ๋ถ์",
|
| 98 |
+
triggerKeywords=("ํ๊ณ", "๊ฐ์ฌ", "๋ถ์", "์ด์ต์์ง", "๋ฐ์์ฃผ์", "ํ๊ณ์ ์ฑ
"),
|
| 99 |
+
analysisGoals=(
|
| 100 |
+
"Accrual Ratio ๊ณ์ฐ: (์์ด์ต-์์
CF)/ํ๊ท ์์ฐ โ 10% ์ด๊ณผ ์ ์์ฌ",
|
| 101 |
+
"๊ฐ์ฌ์๊ฒฌ ๋ณํ์ ๊ฐ์ฌ์ธ ๊ต์ฒด ์ด๋ ฅ ํ์ธ",
|
| 102 |
+
"ํ๊ณ์ ์ฑ
๋ณ๊ฒฝ(์์ต์ธ์, ์๋ณธํ, ๊ฐ๊ฐ์๊ฐ) ์ํฅ ํ์
",
|
| 103 |
+
"๋งค์ถ์ฑ๊ถ/์ฌ๊ณ ์ฆ๊ฐ์จ๊ณผ ๋งค์ถ/์๊ฐ ์ฆ๊ฐ์จ ๋น๊ต",
|
| 104 |
+
),
|
| 105 |
+
synthesisGuide="Accrual Ratio โ ๊ฐ์ฌ ์ด๋ ฅ โ ํ๊ณ์ ์ฑ
๋ณ๊ฒฝ โ ์ด์ต์ ์ง ์ข
ํฉ",
|
| 106 |
+
checkpoints=(
|
| 107 |
+
"CF/NI ๋น์จ ๋๋ Accrual Ratio๋ฅผ ๊ณ์ฐํ๋๊ฐ?",
|
| 108 |
+
"๊ฐ์ฌ์๊ฒฌ์ ํ์ธํ๋๊ฐ?",
|
| 109 |
+
),
|
| 110 |
+
),
|
| 111 |
+
Skill(
|
| 112 |
+
id="dividend",
|
| 113 |
+
name="๋ฐฐ๋น ๋ถ์",
|
| 114 |
+
triggerKeywords=("๋ฐฐ๋น", "๋ฐฐ๋น๊ธ", "๋ฐฐ๋น๋ฅ ", "๋ฐฐ๋น์ฑํฅ", "์ฃผ์ฃผํ์"),
|
| 115 |
+
analysisGoals=(
|
| 116 |
+
"๋ฐฐ๋น ์ถ์ด: ๋ฐฐ๋น๊ธ, ๋ฐฐ๋น์์ต๋ฅ , ๋ฐฐ๋น์ฑํฅ 3~5๋
์๊ณ์ด",
|
| 117 |
+
"๋ฐฐ๋น ์ง์๊ฐ๋ฅ์ฑ: FCF ๋๋น ๋ฐฐ๋น๊ธ, ์ด์ต ์์ ์ฑ",
|
| 118 |
+
"์ฃผ์ฃผํ์ ์ ์ฑ
: ์์ฌ์ฃผ ๋งค์
, ์๊ฐ ์ด๋ ฅ ํ์ธ",
|
| 119 |
+
"๋์ข
์
์ข
๋ฐฐ๋น ์์ค ๋น๊ต (๊ฐ๋ฅ ์)",
|
| 120 |
+
),
|
| 121 |
+
synthesisGuide="๋ฐฐ๋น ์ถ์ด โ ์ง์๊ฐ๋ฅ์ฑ(FCF) โ ์ฃผ์ฃผํ์ ์ ์ฑ
โ ๋งค๋ ฅ๋ ํ๋จ",
|
| 122 |
+
checkpoints=(
|
| 123 |
+
"๋ฐฐ๋น์ฑํฅ๊ณผ ๋ฐฐ๋น์์ต๋ฅ ์์น๋ฅผ ์ธ์ฉํ๋๊ฐ?",
|
| 124 |
+
"FCF ๋๋น ๋ฐฐ๋น ์ปค๋ฒ๋ฆฌ์ง๋ฅผ ํ์ธํ๋๊ฐ?",
|
| 125 |
+
),
|
| 126 |
+
),
|
| 127 |
+
Skill(
|
| 128 |
+
id="comprehensive",
|
| 129 |
+
name="์ข
ํฉ ๋ถ์",
|
| 130 |
+
triggerKeywords=("์ข
ํฉ", "์ ๋ฐ", "์ ์ฒด", "์ดํ", "๋ถ์ํด์ค", "์ด๋"),
|
| 131 |
+
analysisGoals=(
|
| 132 |
+
"์ฌ์
๊ตฌ์กฐ์ ๊ฒฝ์ ํฌ์ง์
๋ ํ์
",
|
| 133 |
+
"ํต์ฌ ์ฌ๋ฌด ์งํ(์์ต์ฑ, ๊ฑด์ ์ฑ, ์ฑ์ฅ์ฑ) 3~5๋
์ถ์ธ",
|
| 134 |
+
"์ด์ต์ ์ง๊ณผ ํ๊ธํ๋ฆ ํ๋กํ์ผ",
|
| 135 |
+
"์ ์ ์ ํธ ์ฒดํฌ ๋ฐ ๋ฆฌ์คํฌ ์์ธ ์๋ณ",
|
| 136 |
+
"๊ฐ์ /์ฝ์ ์ ๋ฆฌ์ Bull/Bear ๋
ผ๊ฑฐ",
|
| 137 |
+
),
|
| 138 |
+
synthesisGuide="์ฌ์
๊ตฌ์กฐ โ ์ฌ๋ฌด ์ถ์ธ โ ์ด์ต์ ์ง โ ๋ฆฌ์คํฌ โ ๊ฐ์ /์ฝ์ โ ์ข
ํฉ ํ๋จ",
|
| 139 |
+
checkpoints=(
|
| 140 |
+
"์ต์ 3๊ฐ ์ด์์ ์ฌ๋ฌด ๋น์จ์ ์ธ์ฉํ๋๊ฐ?",
|
| 141 |
+
"๊ฐ์ ๊ณผ ์ฝ์ ์ ๊ท ํ ์๊ฒ ์ ์ํ๋๊ฐ?",
|
| 142 |
+
"Bull/Bear ๋
ผ๊ฑฐ๋ฅผ ์ ์ํ๋๊ฐ?",
|
| 143 |
+
),
|
| 144 |
+
),
|
| 145 |
+
)
|
src/dartlab/ai/skills/registry.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""์คํฌ ๋ ์ง์คํธ๋ฆฌ โ ๋ถ์ ๋ชฉํ ๊ธฐ๋ฐ ์ํฌํ๋ก์ฐ ๋งค์นญ.
|
| 2 |
+
|
| 3 |
+
Skill์ ๋๊ตฌ๋ฅผ ์ง์ ํ์ง ์๋๋ค.
|
| 4 |
+
๋ถ์ ๋ชฉํ(analysisGoals)์ ์ข
ํฉ ๊ฐ์ด๋(synthesisGuide)๋ง ์ ์ธํ๊ณ ,
|
| 5 |
+
LLM์ด ํ์ฌ ๊ฐ์ฉํ ๋๊ตฌ ์ค์์ ์์จ ์ ํํ๋ค.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
from dataclasses import dataclass, field
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@dataclass(frozen=True)
|
| 14 |
+
class Skill:
|
| 15 |
+
"""๋ถ์ ์ํฌํ๋ก์ฐ ์ ์."""
|
| 16 |
+
|
| 17 |
+
id: str
|
| 18 |
+
name: str
|
| 19 |
+
triggerKeywords: tuple[str, ...]
|
| 20 |
+
analysisGoals: tuple[str, ...]
|
| 21 |
+
synthesisGuide: str
|
| 22 |
+
checkpoints: tuple[str, ...] = field(default_factory=tuple)
|
| 23 |
+
|
| 24 |
+
def toPrompt(self) -> str:
|
| 25 |
+
"""์์คํ
ํ๋กฌํํธ์ ์ฃผ์
ํ ์์ฐ์ด ๊ฐ์ด๋."""
|
| 26 |
+
goals = "\n".join(f" {i + 1}. {g}" for i, g in enumerate(self.analysisGoals))
|
| 27 |
+
checks = ""
|
| 28 |
+
if self.checkpoints:
|
| 29 |
+
checks = "\n**์์ฒด ๊ฒ์ฆ:**\n" + "\n".join(f" - {c}" for c in self.checkpoints)
|
| 30 |
+
return f"## ๋ถ์ ์คํฌ: {self.name}\n\n**๋ถ์ ๋ชฉํ:**\n{goals}\n\n**์ข
ํฉ ํ๋ ์:** {self.synthesisGuide}{checks}"
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def matchSkill(
|
| 34 |
+
question: str,
|
| 35 |
+
questionType: str | None = None,
|
| 36 |
+
) -> Skill | None:
|
| 37 |
+
"""์ง๋ฌธ์ ๊ฐ์ฅ ์ ํฉํ ์คํฌ ๋งค์นญ."""
|
| 38 |
+
from dartlab.ai.skills.catalog import SKILLS
|
| 39 |
+
|
| 40 |
+
# 1์ฐจ: questionType์ผ๋ก ์ง์ ๋งค์นญ
|
| 41 |
+
if questionType:
|
| 42 |
+
for skill in SKILLS:
|
| 43 |
+
if questionType in skill.triggerKeywords:
|
| 44 |
+
return skill
|
| 45 |
+
|
| 46 |
+
# 2์ฐจ: ์ง๋ฌธ ํ
์คํธ ํค์๋ ๋งค์นญ
|
| 47 |
+
if not question:
|
| 48 |
+
return None
|
| 49 |
+
|
| 50 |
+
bestSkill: Skill | None = None
|
| 51 |
+
bestScore = 0
|
| 52 |
+
for skill in SKILLS:
|
| 53 |
+
score = sum(1 for kw in skill.triggerKeywords if kw in question)
|
| 54 |
+
if score > bestScore:
|
| 55 |
+
bestScore = score
|
| 56 |
+
bestSkill = skill
|
| 57 |
+
|
| 58 |
+
return bestSkill if bestScore > 0 else None
|
src/dartlab/ai/tools/defaults/helpers.py
CHANGED
|
@@ -21,8 +21,11 @@ def df_to_md(df: pl.DataFrame, max_rows: int = 15, max_chars: int = 0, market: s
|
|
| 21 |
|
| 22 |
|
| 23 |
def json_to_text(value: Any, max_chars: int = 4000) -> str:
|
| 24 |
-
"""dict/list/json ์ง๋ ฌํ."""
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
| 26 |
if len(text) <= max_chars:
|
| 27 |
return text
|
| 28 |
return text[:max_chars] + "\n... (truncated)"
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
def json_to_text(value: Any, max_chars: int = 4000) -> str:
|
| 24 |
+
"""dict/list/json ์ง๋ ฌํ (pruning ํ)."""
|
| 25 |
+
from dartlab.ai.context.pruning import _STRIP_FIELDS, _pruneValue
|
| 26 |
+
|
| 27 |
+
pruned = _pruneValue(value, _STRIP_FIELDS, depth=0)
|
| 28 |
+
text = json.dumps(pruned, ensure_ascii=False, indent=2, default=str)
|
| 29 |
if len(text) <= max_chars:
|
| 30 |
return text
|
| 31 |
return text[:max_chars] + "\n... (truncated)"
|
src/dartlab/cli/commands/chat.py
ADDED
|
@@ -0,0 +1,472 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""`dartlab chat` command -- ์ธํฐ๋ํฐ๋ธ ํฐ๋ฏธ๋ REPL."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import time
|
| 6 |
+
from dataclasses import dataclass, field
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import Any
|
| 9 |
+
|
| 10 |
+
from dartlab.cli.context import PROVIDERS
|
| 11 |
+
from dartlab.cli.services.errors import CLIError
|
| 12 |
+
from dartlab.cli.services.providers import detect_provider
|
| 13 |
+
from dartlab.cli.services.runtime import configure_dartlab
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def configure_parser(subparsers) -> None:
|
| 17 |
+
parser = subparsers.add_parser("chat", help="๋ํํ AI ๋ถ์ (์ธํฐ๋ํฐ๋ธ REPL)")
|
| 18 |
+
parser.add_argument("company", nargs="?", default=None, help="์ข
๋ชฉ์ฝ๋ ๋๋ ํ์ฌ๋ช
(์๋ต ๊ฐ๋ฅ)")
|
| 19 |
+
parser.add_argument("--provider", "-p", default=None, choices=PROVIDERS, help="LLM provider")
|
| 20 |
+
parser.add_argument("--model", "-m", default=None, help="๋ชจ๋ธ๋ช
")
|
| 21 |
+
parser.add_argument("--base-url", default=None, help="์ปค์คํ
API URL")
|
| 22 |
+
parser.add_argument("--api-key", default=None, help="API ํค")
|
| 23 |
+
parser.add_argument("--continue", dest="cont", action="store_true", help="์ด์ ๋ํ ์ด์ด๊ฐ๊ธฐ")
|
| 24 |
+
parser.set_defaults(handler=run)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# ---------------------------------------------------------------------------
|
| 28 |
+
# State
|
| 29 |
+
# ---------------------------------------------------------------------------
|
| 30 |
+
|
| 31 |
+
@dataclass
|
| 32 |
+
class _ChatState:
|
| 33 |
+
"""REPL ์ธ์
์ํ."""
|
| 34 |
+
|
| 35 |
+
company: Any | None = None
|
| 36 |
+
stockCode: str | None = None
|
| 37 |
+
provider: str | None = None
|
| 38 |
+
model: str | None = None
|
| 39 |
+
baseUrl: str | None = None
|
| 40 |
+
apiKey: str | None = None
|
| 41 |
+
sessionId: int | None = None
|
| 42 |
+
history: list[dict[str, str]] = field(default_factory=list)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
# ---------------------------------------------------------------------------
|
| 46 |
+
# Entry
|
| 47 |
+
# ---------------------------------------------------------------------------
|
| 48 |
+
|
| 49 |
+
def run(args) -> int:
|
| 50 |
+
from rich.console import Console
|
| 51 |
+
|
| 52 |
+
configure_dartlab()
|
| 53 |
+
console = Console()
|
| 54 |
+
provider = args.provider or detect_provider()
|
| 55 |
+
|
| 56 |
+
state = _ChatState(
|
| 57 |
+
provider=provider,
|
| 58 |
+
model=args.model,
|
| 59 |
+
baseUrl=args.base_url,
|
| 60 |
+
apiKey=args.api_key,
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
if args.company:
|
| 64 |
+
if not _loadCompany(state, args.company, console):
|
| 65 |
+
raise CLIError(f"์ข
๋ชฉ์ ์ฐพ์ ์ ์์ต๋๋ค: {args.company}")
|
| 66 |
+
|
| 67 |
+
if args.cont and state.stockCode:
|
| 68 |
+
_resumeSession(state, console)
|
| 69 |
+
|
| 70 |
+
_printWelcome(state, console)
|
| 71 |
+
_replLoop(state, console)
|
| 72 |
+
return 0
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
# ---------------------------------------------------------------------------
|
| 76 |
+
# REPL loop
|
| 77 |
+
# ---------------------------------------------------------------------------
|
| 78 |
+
|
| 79 |
+
_SLASH_WORDS = ["/help", "/company", "/model", "/clear", "/suggest", "/status", "/quit", "/exit", "/q"]
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def _replLoop(state: _ChatState, console) -> None:
|
| 83 |
+
promptFn = _makePromptFn()
|
| 84 |
+
|
| 85 |
+
while True:
|
| 86 |
+
prompt = _buildPrompt(state)
|
| 87 |
+
try:
|
| 88 |
+
userInput = promptFn(prompt)
|
| 89 |
+
except KeyboardInterrupt:
|
| 90 |
+
continue
|
| 91 |
+
except EOFError:
|
| 92 |
+
console.print("\n[dim]์ฑํ
์ ์ข
๋ฃํฉ๋๋ค.[/]")
|
| 93 |
+
break
|
| 94 |
+
|
| 95 |
+
userInput = userInput.strip()
|
| 96 |
+
if not userInput:
|
| 97 |
+
continue
|
| 98 |
+
|
| 99 |
+
if userInput.startswith("/"):
|
| 100 |
+
shouldExit = _handleSlash(userInput, state, console)
|
| 101 |
+
if shouldExit:
|
| 102 |
+
break
|
| 103 |
+
continue
|
| 104 |
+
|
| 105 |
+
# ์ข
๋ชฉ ์์ผ๋ฉด ํ
์คํธ์์ ์๋ ๊ฐ์ง ์๋
|
| 106 |
+
if state.company is None:
|
| 107 |
+
_tryAutoDetect(userInput, state, console)
|
| 108 |
+
|
| 109 |
+
_executeQuery(userInput, state, console)
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def _makePromptFn():
|
| 113 |
+
"""prompt_toolkit PromptSession์ ๋ฐํ. ํฐ๋ฏธ๋์ด ์๋๋ฉด input() fallback."""
|
| 114 |
+
try:
|
| 115 |
+
import sys
|
| 116 |
+
|
| 117 |
+
if not sys.stdin.isatty():
|
| 118 |
+
return input
|
| 119 |
+
|
| 120 |
+
from prompt_toolkit import PromptSession
|
| 121 |
+
from prompt_toolkit.completion import WordCompleter
|
| 122 |
+
from prompt_toolkit.history import FileHistory
|
| 123 |
+
|
| 124 |
+
historyDir = Path.home() / ".dartlab"
|
| 125 |
+
historyDir.mkdir(parents=True, exist_ok=True)
|
| 126 |
+
historyFile = historyDir / "chat.history"
|
| 127 |
+
|
| 128 |
+
completer = WordCompleter(_SLASH_WORDS, sentence=True)
|
| 129 |
+
session = PromptSession(
|
| 130 |
+
history=FileHistory(str(historyFile)),
|
| 131 |
+
completer=completer,
|
| 132 |
+
)
|
| 133 |
+
return session.prompt
|
| 134 |
+
except (ImportError, RuntimeError, OSError):
|
| 135 |
+
return input
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def _buildPrompt(state: _ChatState) -> str:
|
| 139 |
+
if state.company:
|
| 140 |
+
return f"\ndartlab {state.company.corpName} > "
|
| 141 |
+
return "\ndartlab > "
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
# ---------------------------------------------------------------------------
|
| 145 |
+
# Query execution
|
| 146 |
+
# ---------------------------------------------------------------------------
|
| 147 |
+
|
| 148 |
+
def _executeQuery(question: str, state: _ChatState, console) -> None:
|
| 149 |
+
from rich.live import Live
|
| 150 |
+
from rich.markdown import Markdown
|
| 151 |
+
from rich.text import Text
|
| 152 |
+
|
| 153 |
+
from dartlab.ai.runtime.core import analyze
|
| 154 |
+
|
| 155 |
+
events = analyze(
|
| 156 |
+
state.company,
|
| 157 |
+
question,
|
| 158 |
+
provider=state.provider,
|
| 159 |
+
model=state.model,
|
| 160 |
+
base_url=state.baseUrl,
|
| 161 |
+
api_key=state.apiKey,
|
| 162 |
+
use_tools=True,
|
| 163 |
+
history=state.history if state.history else None,
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
buffer = ""
|
| 167 |
+
toolStartTime: float | None = None
|
| 168 |
+
toolPanels: list[str] = [] # tool ๊ฒฐ๊ณผ ๋ฐ์ดํฐ ๋์ (LLM ์๋ต ์ ํ์)
|
| 169 |
+
queryStart = time.monotonic()
|
| 170 |
+
|
| 171 |
+
try:
|
| 172 |
+
with Live(console=console, refresh_per_second=8, vertical_overflow="visible") as live:
|
| 173 |
+
for ev in events:
|
| 174 |
+
if ev.kind == "chunk":
|
| 175 |
+
buffer += ev.data["text"]
|
| 176 |
+
live.update(Markdown(buffer))
|
| 177 |
+
elif ev.kind == "tool_call":
|
| 178 |
+
toolName = ev.data.get("name", "")
|
| 179 |
+
label = _toolLabel(toolName)
|
| 180 |
+
toolStartTime = time.monotonic()
|
| 181 |
+
live.update(Markdown(buffer + f"\n\n> {label} ์กฐํ ์ค..."))
|
| 182 |
+
elif ev.kind == "tool_result":
|
| 183 |
+
toolName = ev.data.get("name", "")
|
| 184 |
+
label = _toolLabel(toolName)
|
| 185 |
+
elapsed = ""
|
| 186 |
+
if toolStartTime is not None:
|
| 187 |
+
dt = time.monotonic() - toolStartTime
|
| 188 |
+
elapsed = f" ({dt:.1f}s)"
|
| 189 |
+
toolStartTime = None
|
| 190 |
+
# tool ๊ฒฐ๊ณผ ๋ฐ์ดํฐ ์์ง
|
| 191 |
+
resultText = ev.data.get("result", "")
|
| 192 |
+
preview = _toolResultPreview(resultText)
|
| 193 |
+
statusLine = f"> {label} ์๋ฃ{elapsed}"
|
| 194 |
+
if preview:
|
| 195 |
+
statusLine += f" -- {preview}"
|
| 196 |
+
toolPanels.append(resultText)
|
| 197 |
+
live.update(Markdown(buffer + f"\n\n{statusLine}"))
|
| 198 |
+
elif ev.kind == "error":
|
| 199 |
+
errorMsg = ev.data.get("error", "์ ์ ์๋ ์ค๋ฅ")
|
| 200 |
+
console.print(f"\n [red]{errorMsg}[/]")
|
| 201 |
+
return
|
| 202 |
+
except KeyboardInterrupt:
|
| 203 |
+
console.print("\n [dim]์๋ต ์ค๋จ[/]")
|
| 204 |
+
|
| 205 |
+
# tool ๊ฒฐ๊ณผ ๋ฐ์ดํฐ ์ธ๋ผ์ธ ํ์ (LLM ์๋ต ์ ์ ๋์จ ํ
์ด๋ธ)
|
| 206 |
+
if toolPanels:
|
| 207 |
+
console.print()
|
| 208 |
+
for panel in toolPanels:
|
| 209 |
+
_renderToolData(panel, console)
|
| 210 |
+
|
| 211 |
+
console.print()
|
| 212 |
+
|
| 213 |
+
# done ์์ฝ (์์ ์๊ฐ)
|
| 214 |
+
totalElapsed = time.monotonic() - queryStart
|
| 215 |
+
console.print(Text(f" {totalElapsed:.1f}s", style="dim"))
|
| 216 |
+
|
| 217 |
+
if buffer:
|
| 218 |
+
state.history.append({"role": "user", "content": question})
|
| 219 |
+
state.history.append({"role": "assistant", "content": buffer})
|
| 220 |
+
_saveMessage(state, "user", question)
|
| 221 |
+
_saveMessage(state, "assistant", buffer)
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
def _toolResultPreview(resultText: str) -> str:
|
| 225 |
+
"""tool ๊ฒฐ๊ณผ ํ
์คํธ์์ ํ ์ค ์์ฝ์ ์ถ์ถํ๋ค."""
|
| 226 |
+
if not resultText or resultText.startswith("[์ค๋ฅ]"):
|
| 227 |
+
return ""
|
| 228 |
+
lines = resultText.strip().splitlines()
|
| 229 |
+
# markdown ํ
์ด๋ธ์ด ์์ผ๋ฉด ํ ์ ํ์
|
| 230 |
+
tableRows = [ln for ln in lines if ln.startswith("|") and "---" not in ln]
|
| 231 |
+
if len(tableRows) > 1:
|
| 232 |
+
return f"{len(tableRows) - 1}ํ" # ํค๋ ์ ์ธ
|
| 233 |
+
# ์ผ๋ฐ ํ
์คํธ๋ฉด ์ฒซ ์ค ์๋ถ๋ถ
|
| 234 |
+
firstLine = lines[0].strip().lstrip("#").strip() if lines else ""
|
| 235 |
+
if len(firstLine) > 60:
|
| 236 |
+
firstLine = firstLine[:57] + "..."
|
| 237 |
+
return firstLine
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
def _renderToolData(resultText: str, console) -> None:
|
| 241 |
+
"""tool ๊ฒฐ๊ณผ๋ฅผ Rich๋ก ๋ ๋๋งํ๋ค (markdown ํ
์ด๋ธ ํฌํจ)."""
|
| 242 |
+
from rich.markdown import Markdown
|
| 243 |
+
from rich.panel import Panel
|
| 244 |
+
|
| 245 |
+
# markdown ํ
์ด๋ธ์ด ํฌํจ๋ ๊ฒฝ์ฐ ํจ๋๋ก ๊ฐ์ธ์ ํ์
|
| 246 |
+
lines = resultText.strip().splitlines()
|
| 247 |
+
hasTable = any(ln.startswith("|") for ln in lines)
|
| 248 |
+
if hasTable:
|
| 249 |
+
# ๋๋ฌด ๊ธธ๋ฉด ์๋ถ๋ถ๋ง (์ต๋ 30์ค)
|
| 250 |
+
if len(lines) > 30:
|
| 251 |
+
truncated = "\n".join(lines[:30]) + f"\n\n... (+{len(lines) - 30}์ค)"
|
| 252 |
+
else:
|
| 253 |
+
truncated = resultText.strip()
|
| 254 |
+
console.print(Panel(Markdown(truncated), border_style="dim", padding=(0, 1)))
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
_TOOL_LABELS = {
|
| 258 |
+
"explore": "๊ณต์ ํ์",
|
| 259 |
+
"finance": "์ฌ๋ฌด ๋ฐ์ดํฐ",
|
| 260 |
+
"analyze": "๋ถ์ ์์ง",
|
| 261 |
+
"market": "์์ฅ ๋ฐ์ดํฐ",
|
| 262 |
+
"openapi": "OpenDART API",
|
| 263 |
+
"system": "์์คํ
์ ๋ณด",
|
| 264 |
+
"chart": "์ฐจํธ ์์ฑ",
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
def _toolLabel(toolName: str) -> str:
|
| 269 |
+
return _TOOL_LABELS.get(toolName, toolName)
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
# ---------------------------------------------------------------------------
|
| 273 |
+
# Company management
|
| 274 |
+
# ---------------------------------------------------------------------------
|
| 275 |
+
|
| 276 |
+
def _loadCompany(state: _ChatState, identifier: str, console) -> bool:
|
| 277 |
+
import dartlab
|
| 278 |
+
|
| 279 |
+
state.company = None # GC ์ ๋
|
| 280 |
+
|
| 281 |
+
try:
|
| 282 |
+
company = dartlab.Company(identifier)
|
| 283 |
+
except (ValueError, FileNotFoundError, OSError, RuntimeError):
|
| 284 |
+
from dartlab.core.resolve import resolve_from_text
|
| 285 |
+
|
| 286 |
+
company, _ = resolve_from_text(identifier)
|
| 287 |
+
|
| 288 |
+
if company is None:
|
| 289 |
+
console.print(f" [red]์ข
๋ชฉ์ ์ฐพ์ ์ ์์ต๋๋ค: {identifier}[/]")
|
| 290 |
+
return False
|
| 291 |
+
|
| 292 |
+
state.company = company
|
| 293 |
+
state.stockCode = company.stockCode
|
| 294 |
+
console.print(f" [bold]{company.corpName}[/] ({company.stockCode}) ๋ก๋ ์๋ฃ")
|
| 295 |
+
return True
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
def _tryAutoDetect(userInput: str, state: _ChatState, console) -> None:
|
| 299 |
+
from dartlab.core.resolve import resolve_from_text
|
| 300 |
+
|
| 301 |
+
company, _ = resolve_from_text(userInput)
|
| 302 |
+
if company is not None:
|
| 303 |
+
state.company = company
|
| 304 |
+
state.stockCode = company.stockCode
|
| 305 |
+
console.print(f" [dim]{company.corpName} ({company.stockCode}) ์๋ ๊ฐ์ง[/]")
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
# ---------------------------------------------------------------------------
|
| 309 |
+
# Slash commands
|
| 310 |
+
# ---------------------------------------------------------------------------
|
| 311 |
+
|
| 312 |
+
def _handleSlash(userInput: str, state: _ChatState, console) -> bool:
|
| 313 |
+
parts = userInput.split(maxsplit=1)
|
| 314 |
+
cmd = parts[0].lower()
|
| 315 |
+
arg = parts[1].strip() if len(parts) > 1 else ""
|
| 316 |
+
|
| 317 |
+
if cmd in ("/quit", "/exit", "/q"):
|
| 318 |
+
console.print("[dim]์ฑํ
์ ์ข
๋ฃํฉ๋๋ค.[/]")
|
| 319 |
+
return True
|
| 320 |
+
|
| 321 |
+
handlers = {
|
| 322 |
+
"/help": _cmdHelp,
|
| 323 |
+
"/company": _cmdCompany,
|
| 324 |
+
"/model": _cmdModel,
|
| 325 |
+
"/clear": _cmdClear,
|
| 326 |
+
"/suggest": _cmdSuggest,
|
| 327 |
+
"/status": _cmdStatus,
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
handler = handlers.get(cmd)
|
| 331 |
+
if handler:
|
| 332 |
+
handler(arg, state, console)
|
| 333 |
+
else:
|
| 334 |
+
console.print(f" [yellow]์ ์ ์๋ ๋ช
๋ น: {cmd}[/] /help ๋ก ์ฌ์ฉ๋ฒ ํ์ธ")
|
| 335 |
+
|
| 336 |
+
return False
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
def _cmdHelp(_arg: str, _state: _ChatState, console) -> None:
|
| 340 |
+
console.print("""
|
| 341 |
+
[bold]๋ช
๋ น์ด[/]
|
| 342 |
+
/help ์ด ๋์๋ง
|
| 343 |
+
/company <์ด๋ฆ/์ฝ๋> ์ข
๋ชฉ ๋ณ๊ฒฝ
|
| 344 |
+
/model <์ด๋ฆ> ๋ชจ๋ธ/provider ๋ณ๊ฒฝ
|
| 345 |
+
/clear ๋ํ ๊ธฐ๋ก ์ด๊ธฐํ
|
| 346 |
+
/suggest ์ถ์ฒ ์ง๋ฌธ
|
| 347 |
+
/status ํ์ฌ ์ค์
|
| 348 |
+
/quit ์ข
๋ฃ
|
| 349 |
+
""")
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
def _cmdCompany(arg: str, state: _ChatState, console) -> None:
|
| 353 |
+
if not arg:
|
| 354 |
+
if state.company:
|
| 355 |
+
console.print(f" ํ์ฌ: [bold]{state.company.corpName}[/] ({state.stockCode})")
|
| 356 |
+
else:
|
| 357 |
+
console.print(" [dim]๋ก๋๋ ์ข
๋ชฉ์ด ์์ต๋๋ค. /company ์ผ์ฑ์ ์[/]")
|
| 358 |
+
return
|
| 359 |
+
|
| 360 |
+
hadCompany = state.company is not None
|
| 361 |
+
if _loadCompany(state, arg, console):
|
| 362 |
+
if hadCompany:
|
| 363 |
+
state.history.clear()
|
| 364 |
+
state.sessionId = None
|
| 365 |
+
console.print(" [dim]์ข
๋ชฉ ๋ณ๊ฒฝ์ผ๋ก ๋ํ ๊ธฐ๋ก์ด ์ด๊ธฐํ๋์์ต๋๋ค.[/]")
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
def _cmdModel(arg: str, state: _ChatState, console) -> None:
|
| 369 |
+
if not arg:
|
| 370 |
+
console.print(f" provider: [bold]{state.provider}[/]")
|
| 371 |
+
console.print(f" model: {state.model or '(๊ธฐ๋ณธ๊ฐ)'}")
|
| 372 |
+
return
|
| 373 |
+
|
| 374 |
+
if arg in PROVIDERS:
|
| 375 |
+
state.provider = arg
|
| 376 |
+
state.model = None
|
| 377 |
+
console.print(f" provider -> [bold]{arg}[/]")
|
| 378 |
+
else:
|
| 379 |
+
state.model = arg
|
| 380 |
+
console.print(f" model -> [bold]{arg}[/]")
|
| 381 |
+
|
| 382 |
+
|
| 383 |
+
def _cmdClear(_arg: str, state: _ChatState, console) -> None:
|
| 384 |
+
state.history.clear()
|
| 385 |
+
state.sessionId = None
|
| 386 |
+
console.print(" [dim]๋ํ ๊ธฐ๋ก์ด ์ด๊ธฐํ๋์์ต๋๋ค.[/]")
|
| 387 |
+
|
| 388 |
+
|
| 389 |
+
def _cmdSuggest(_arg: str, state: _ChatState, console) -> None:
|
| 390 |
+
if state.company is None:
|
| 391 |
+
console.print(" [dim]์ข
๋ชฉ์ ๋จผ์ ๋ก๋ํ์ธ์. /company ์ผ์ฑ์ ์[/]")
|
| 392 |
+
return
|
| 393 |
+
|
| 394 |
+
from dartlab.ai.conversation.suggestions import suggestQuestions
|
| 395 |
+
|
| 396 |
+
questions = suggestQuestions(state.company)
|
| 397 |
+
for i, q in enumerate(questions, 1):
|
| 398 |
+
console.print(f" [cyan]{i}.[/] {q}")
|
| 399 |
+
|
| 400 |
+
|
| 401 |
+
def _cmdStatus(_arg: str, state: _ChatState, console) -> None:
|
| 402 |
+
console.print(f" provider: [bold]{state.provider}[/]")
|
| 403 |
+
console.print(f" model: {state.model or '(๊ธฐ๋ณธ๊ฐ)'}")
|
| 404 |
+
if state.company:
|
| 405 |
+
console.print(f" company: [bold]{state.company.corpName}[/] ({state.stockCode})")
|
| 406 |
+
else:
|
| 407 |
+
console.print(" company: (์์)")
|
| 408 |
+
console.print(f" history: {len(state.history)}๊ฐ ๋ฉ์์ง")
|
| 409 |
+
|
| 410 |
+
|
| 411 |
+
# ---------------------------------------------------------------------------
|
| 412 |
+
# Session persistence
|
| 413 |
+
# ---------------------------------------------------------------------------
|
| 414 |
+
|
| 415 |
+
def _saveMessage(state: _ChatState, role: str, content: str) -> None:
|
| 416 |
+
try:
|
| 417 |
+
from dartlab.cli.services.history import add_message, create_session
|
| 418 |
+
|
| 419 |
+
if state.sessionId is None:
|
| 420 |
+
stockCode = state.stockCode or "__no_company__"
|
| 421 |
+
state.sessionId = create_session(stockCode)
|
| 422 |
+
add_message(state.sessionId, role, content)
|
| 423 |
+
except (OSError, ImportError):
|
| 424 |
+
pass
|
| 425 |
+
|
| 426 |
+
|
| 427 |
+
def _resumeSession(state: _ChatState, console) -> None:
|
| 428 |
+
try:
|
| 429 |
+
from dartlab.cli.services.history import get_latest_session, get_messages
|
| 430 |
+
|
| 431 |
+
sessionId = get_latest_session(state.stockCode)
|
| 432 |
+
if sessionId:
|
| 433 |
+
state.sessionId = sessionId
|
| 434 |
+
state.history = get_messages(sessionId)
|
| 435 |
+
console.print(f" [dim]์ด์ ๋ํ ์ด์ด๊ฐ๊ธฐ (๋ฉ๏ฟฝ๏ฟฝ์ง {len(state.history)}๊ฐ)[/]")
|
| 436 |
+
except (OSError, ImportError):
|
| 437 |
+
pass
|
| 438 |
+
|
| 439 |
+
|
| 440 |
+
# ---------------------------------------------------------------------------
|
| 441 |
+
# Welcome
|
| 442 |
+
# ---------------------------------------------------------------------------
|
| 443 |
+
|
| 444 |
+
def _printWelcome(state: _ChatState, console) -> None:
|
| 445 |
+
console.print()
|
| 446 |
+
console.print(" [bold cyan]DartLab Chat[/] -- ๋ํํ AI ๊ธฐ์
๋ถ์")
|
| 447 |
+
providerLine = f" [dim]provider: {state.provider}"
|
| 448 |
+
if state.model:
|
| 449 |
+
providerLine += f" / {state.model}"
|
| 450 |
+
providerLine += "[/]"
|
| 451 |
+
console.print(providerLine)
|
| 452 |
+
console.print()
|
| 453 |
+
|
| 454 |
+
if state.company:
|
| 455 |
+
console.print(f" [bold]{state.company.corpName}[/] ({state.stockCode})")
|
| 456 |
+
try:
|
| 457 |
+
from dartlab.ai.conversation.suggestions import suggestQuestions
|
| 458 |
+
|
| 459 |
+
questions = suggestQuestions(state.company)
|
| 460 |
+
if questions:
|
| 461 |
+
console.print()
|
| 462 |
+
console.print(" [dim]์ถ์ฒ ์ง๋ฌธ:[/]")
|
| 463 |
+
for q in questions[:4]:
|
| 464 |
+
console.print(f" [dim]-[/] {q}")
|
| 465 |
+
except (ImportError, AttributeError):
|
| 466 |
+
pass
|
| 467 |
+
else:
|
| 468 |
+
console.print(" [dim]์ข
๋ชฉ ์์ด ์์ํฉ๋๋ค. ์ง๋ฌธ์ ์ข
๋ชฉ๋ช
์ ํฌํจํ๊ฑฐ๋ /company ๋ช
๋ น์ ์ฌ์ฉํ์ธ์.[/]")
|
| 469 |
+
|
| 470 |
+
console.print()
|
| 471 |
+
console.print(" [dim]/help ์ฌ์ฉ๋ฒ | /quit ์ข
๋ฃ | Ctrl+C ์
๋ ฅ ์ทจ์[/]")
|
| 472 |
+
console.print()
|
src/dartlab/cli/commands/collect.py
CHANGED
|
@@ -119,6 +119,20 @@ def configure_parser(subparsers) -> None:
|
|
| 119 |
action="store_true",
|
| 120 |
help="๋๋ฝ ๊ณต์๋ง ์ฆ๋ถ ์์ง (DART)",
|
| 121 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
# EDGAR ์ ์ฉ
|
| 123 |
parser.add_argument(
|
| 124 |
"--tier",
|
|
@@ -139,6 +153,10 @@ def run(args) -> int:
|
|
| 139 |
if source == "edgar":
|
| 140 |
return _runEdgar(console, args)
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
# --- DART ---
|
| 143 |
if getattr(args, "check", False):
|
| 144 |
return _runCheck(console, args)
|
|
@@ -178,12 +196,45 @@ def _printHelp(console) -> None:
|
|
| 178 |
console.print(" dartlab collect --batch ์ ์ฒด ์์ฅ ๋ฐฐ์น ์์ง")
|
| 179 |
console.print(" dartlab collect --stats ์์ง ํํฉ")
|
| 180 |
console.print()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
console.print(" [bold]EDGAR[/] (ticker = ์๋ฌธ โ ์๋ ๊ฐ์ง):")
|
| 182 |
console.print(" dartlab collect AAPL MSFT ์ง์ ticker ์์ง")
|
| 183 |
console.print(" dartlab collect --tier sp500 S&P 500 ์ ์ฒด ์์ง")
|
| 184 |
console.print(" dartlab collect --tier sp500 --limit 10 10๊ฐ๋ง ํ
์คํธ")
|
| 185 |
|
| 186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
# โโ EDGAR โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 188 |
|
| 189 |
|
|
|
|
| 119 |
action="store_true",
|
| 120 |
help="๋๋ฝ ๊ณต์๋ง ์ฆ๋ถ ์์ง (DART)",
|
| 121 |
)
|
| 122 |
+
# scan ํ๋ฆฌ๋น๋
|
| 123 |
+
parser.add_argument(
|
| 124 |
+
"--scan",
|
| 125 |
+
nargs="?",
|
| 126 |
+
const="all",
|
| 127 |
+
default=None,
|
| 128 |
+
help="์ ์ข
๋ชฉ scan ํ๋ฆฌ๋น๋ (all/changes/finance/report)",
|
| 129 |
+
)
|
| 130 |
+
parser.add_argument(
|
| 131 |
+
"--since-year",
|
| 132 |
+
type=int,
|
| 133 |
+
default=2021,
|
| 134 |
+
help="scan ํ๋ฆฌ๋น๋ ์์ ์ฐ๋ (๊ธฐ๋ณธ 2021)",
|
| 135 |
+
)
|
| 136 |
# EDGAR ์ ์ฉ
|
| 137 |
parser.add_argument(
|
| 138 |
"--tier",
|
|
|
|
| 153 |
if source == "edgar":
|
| 154 |
return _runEdgar(console, args)
|
| 155 |
|
| 156 |
+
# --- scan ํ๋ฆฌ๋น๋ ---
|
| 157 |
+
if getattr(args, "scan", None):
|
| 158 |
+
return _runScan(console, args)
|
| 159 |
+
|
| 160 |
# --- DART ---
|
| 161 |
if getattr(args, "check", False):
|
| 162 |
return _runCheck(console, args)
|
|
|
|
| 196 |
console.print(" dartlab collect --batch ์ ์ฒด ์์ฅ ๋ฐฐ์น ์์ง")
|
| 197 |
console.print(" dartlab collect --stats ์์ง ํํฉ")
|
| 198 |
console.print()
|
| 199 |
+
console.print(" [bold]scan ํ๋ฆฌ๋น๋[/]:")
|
| 200 |
+
console.print(" dartlab collect --scan ์ ์ข
๋ชฉ ํก๋จ๋ถ์ ํ๋ฆฌ๋น๋ (changes+finance+report)")
|
| 201 |
+
console.print(" dartlab collect --scan changes changes๋ง ํ๋ฆฌ๋น๋")
|
| 202 |
+
console.print(" dartlab collect --scan finance finance๋ง ํ๋ฆฌ๋น๋")
|
| 203 |
+
console.print(" dartlab collect --scan report report๋ง ํ๋ฆฌ๋น๋")
|
| 204 |
+
console.print()
|
| 205 |
console.print(" [bold]EDGAR[/] (ticker = ์๋ฌธ โ ์๋ ๊ฐ์ง):")
|
| 206 |
console.print(" dartlab collect AAPL MSFT ์ง์ ticker ์์ง")
|
| 207 |
console.print(" dartlab collect --tier sp500 S&P 500 ์ ์ฒด ์์ง")
|
| 208 |
console.print(" dartlab collect --tier sp500 --limit 10 10๊ฐ๋ง ํ
์คํธ")
|
| 209 |
|
| 210 |
|
| 211 |
+
# โโ scan ํ๋ฆฌ๋น๋ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
def _runScan(console, args) -> int:
|
| 215 |
+
"""์ ์ข
๋ชฉ scan ํ๋ฆฌ๋น๋ ์คํ."""
|
| 216 |
+
from dartlab.market.scan.builder import buildScan, buildChanges, buildFinance, buildReport
|
| 217 |
+
|
| 218 |
+
target = getattr(args, "scan", "all")
|
| 219 |
+
sinceYear = getattr(args, "since_year", 2021)
|
| 220 |
+
|
| 221 |
+
console.print(f"[bold]scan ํ๋ฆฌ๋น๋[/] target={target}, sinceYear={sinceYear}")
|
| 222 |
+
|
| 223 |
+
if target == "all":
|
| 224 |
+
buildScan(sinceYear=sinceYear, verbose=True)
|
| 225 |
+
elif target == "changes":
|
| 226 |
+
buildChanges(sinceYear=sinceYear, verbose=True)
|
| 227 |
+
elif target == "finance":
|
| 228 |
+
buildFinance(sinceYear=sinceYear, verbose=True)
|
| 229 |
+
elif target == "report":
|
| 230 |
+
buildReport(sinceYear=sinceYear, verbose=True)
|
| 231 |
+
else:
|
| 232 |
+
console.print(f"[red]์ ์ ์๋ scan ํ๊ฒ: {target}[/]")
|
| 233 |
+
return 1
|
| 234 |
+
|
| 235 |
+
return 0
|
| 236 |
+
|
| 237 |
+
|
| 238 |
# โโ EDGAR โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 239 |
|
| 240 |
|
src/dartlab/cli/parser.py
CHANGED
|
@@ -19,6 +19,7 @@ COMMAND_SPECS = (
|
|
| 19 |
CommandSpec("modules", "dartlab.cli.commands.modules"),
|
| 20 |
# AI / ๋ด๋ณด๋ด๊ธฐ
|
| 21 |
CommandSpec("ask", "dartlab.cli.commands.ask"),
|
|
|
|
| 22 |
CommandSpec("report", "dartlab.cli.commands.report"),
|
| 23 |
CommandSpec("excel", "dartlab.cli.commands.excel"),
|
| 24 |
# ๋ถ์
|
|
|
|
| 19 |
CommandSpec("modules", "dartlab.cli.commands.modules"),
|
| 20 |
# AI / ๋ด๋ณด๋ด๊ธฐ
|
| 21 |
CommandSpec("ask", "dartlab.cli.commands.ask"),
|
| 22 |
+
CommandSpec("chat", "dartlab.cli.commands.chat"),
|
| 23 |
CommandSpec("report", "dartlab.cli.commands.report"),
|
| 24 |
CommandSpec("excel", "dartlab.cli.commands.excel"),
|
| 25 |
# ๋ถ์
|
src/dartlab/core/dataConfig.py
CHANGED
|
@@ -22,6 +22,10 @@ DATA_RELEASES: dict[str, dict] = {
|
|
| 22 |
"dir": "dart/report",
|
| 23 |
"label": "์ ๊ธฐ๋ณด๊ณ ์ ๋ฐ์ดํฐ",
|
| 24 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
"edgarDocs": {
|
| 26 |
"dir": "edgar/docs",
|
| 27 |
"label": "SEC EDGAR ๊ณต์ ๋ฌธ์ ๋ฐ์ดํฐ",
|
|
|
|
| 22 |
"dir": "dart/report",
|
| 23 |
"label": "์ ๊ธฐ๋ณด๊ณ ์ ๋ฐ์ดํฐ",
|
| 24 |
},
|
| 25 |
+
"scan": {
|
| 26 |
+
"dir": "dart/scan",
|
| 27 |
+
"label": "์ ์ข
๋ชฉ ํก๋จ๋ถ์ ํ๋ฆฌ๋น๋ ๋ฐ์ดํฐ",
|
| 28 |
+
},
|
| 29 |
"edgarDocs": {
|
| 30 |
"dir": "edgar/docs",
|
| 31 |
"label": "SEC EDGAR ๊ณต์ ๋ฌธ์ ๋ฐ์ดํฐ",
|
src/dartlab/core/dataLoader.py
CHANGED
|
@@ -300,11 +300,13 @@ def downloadAll(category: str = "docs", *, forceUpdate: bool = False) -> None:
|
|
| 300 |
lastErr = None
|
| 301 |
for attempt in range(_HF_MAX_RETRIES):
|
| 302 |
try:
|
|
|
|
|
|
|
| 303 |
snapshot_download(
|
| 304 |
repo_id=HF_REPO,
|
| 305 |
repo_type="dataset",
|
| 306 |
local_dir=str(localDir),
|
| 307 |
-
allow_patterns=
|
| 308 |
force_download=forceUpdate if attempt == 0 else False,
|
| 309 |
)
|
| 310 |
break
|
|
@@ -320,7 +322,8 @@ def downloadAll(category: str = "docs", *, forceUpdate: bool = False) -> None:
|
|
| 320 |
f"๋ง์ง๋ง ์๋ฌ: {lastErr}"
|
| 321 |
)
|
| 322 |
|
| 323 |
-
|
|
|
|
| 324 |
emit("download_all:hf_done", label=label, count=count, dataDir=str(dataDir))
|
| 325 |
|
| 326 |
|
|
|
|
| 300 |
lastErr = None
|
| 301 |
for attempt in range(_HF_MAX_RETRIES):
|
| 302 |
try:
|
| 303 |
+
# scan์ ํ์ ํด๋(report/)๋ ํฌํจํ๋ฏ๋ก ** ํจํด ์ฌ์ฉ
|
| 304 |
+
pattern = f"{hfDir}/**/*.parquet" if category == "scan" else f"{hfDir}/*.parquet"
|
| 305 |
snapshot_download(
|
| 306 |
repo_id=HF_REPO,
|
| 307 |
repo_type="dataset",
|
| 308 |
local_dir=str(localDir),
|
| 309 |
+
allow_patterns=pattern,
|
| 310 |
force_download=forceUpdate if attempt == 0 else False,
|
| 311 |
)
|
| 312 |
break
|
|
|
|
| 322 |
f"๋ง์ง๋ง ์๋ฌ: {lastErr}"
|
| 323 |
)
|
| 324 |
|
| 325 |
+
globPattern = "**/*.parquet" if category == "scan" else "*.parquet"
|
| 326 |
+
count = len(list(dataDir.glob(globPattern)))
|
| 327 |
emit("download_all:hf_done", label=label, count=count, dataDir=str(dataDir))
|
| 328 |
|
| 329 |
|
src/dartlab/market/_helpers.py
CHANGED
|
@@ -10,11 +10,25 @@ import polars as pl
|
|
| 10 |
def scan_parquets(api_type: str, keep_cols: list[str]) -> pl.DataFrame:
|
| 11 |
"""report parquet์์ ํน์ apiType๋ง LazyFrame ์ค์บ.
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
"""
|
| 16 |
from dartlab.core.dataLoader import _dataDir
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
report_dir = Path(_dataDir("report"))
|
| 19 |
parquet_files = sorted(report_dir.glob("*.parquet"))
|
| 20 |
|
|
@@ -121,6 +135,55 @@ def parse_date_year(s) -> int | None:
|
|
| 121 |
return None
|
| 122 |
|
| 123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
def scan_finance_parquets(
|
| 125 |
statement: str,
|
| 126 |
account_ids: set[str],
|
|
@@ -130,16 +193,26 @@ def scan_finance_parquets(
|
|
| 130 |
) -> dict[str, float]:
|
| 131 |
"""finance parquet ์ ์ ์ค์บ โ {์ข
๋ชฉ์ฝ๋: ๊ฐ}.
|
| 132 |
|
| 133 |
-
|
| 134 |
-
|
| 135 |
"""
|
| 136 |
from dartlab.core.dataLoader import _dataDir
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
finance_dir = Path(_dataDir("finance"))
|
| 139 |
parquet_files = sorted(finance_dir.glob("*.parquet"))
|
| 140 |
|
| 141 |
result: dict[str, float] = {}
|
| 142 |
-
sj_divs = [statement] if statement != "IS" else ["IS", "CIS"]
|
| 143 |
for pf in parquet_files:
|
| 144 |
code = pf.stem
|
| 145 |
try:
|
|
|
|
| 10 |
def scan_parquets(api_type: str, keep_cols: list[str]) -> pl.DataFrame:
|
| 11 |
"""report parquet์์ ํน์ apiType๋ง LazyFrame ์ค์บ.
|
| 12 |
|
| 13 |
+
scan/report/{apiType}.parquet ํ๋ฆฌ๋น๋๊ฐ ์์ผ๋ฉด ๋จ์ผ ํ์ผ์์ ์ฆ์ ๋ก๋.
|
| 14 |
+
์์ผ๋ฉด ์ข
๋ชฉ๋ณ parquet ์ํ (fallback).
|
| 15 |
"""
|
| 16 |
from dartlab.core.dataLoader import _dataDir
|
| 17 |
|
| 18 |
+
# 1์์: ํ๋ฆฌ๋น๋ scan parquet
|
| 19 |
+
scan_path = Path(_dataDir("scan")) / "report" / f"{api_type}.parquet"
|
| 20 |
+
if scan_path.exists():
|
| 21 |
+
try:
|
| 22 |
+
lf = pl.scan_parquet(str(scan_path))
|
| 23 |
+
schema_names = lf.collect_schema().names()
|
| 24 |
+
available = [c for c in keep_cols if c in schema_names]
|
| 25 |
+
non_meta = [c for c in available if c not in ("stockCode", "year", "quarter")]
|
| 26 |
+
if non_meta:
|
| 27 |
+
return lf.select(available).collect()
|
| 28 |
+
except (pl.exceptions.PolarsError, OSError):
|
| 29 |
+
pass # fallback to per-file scan
|
| 30 |
+
|
| 31 |
+
# 2์์: ์ข
๋ชฉ๋ณ ์ํ (fallback)
|
| 32 |
report_dir = Path(_dataDir("report"))
|
| 33 |
parquet_files = sorted(report_dir.glob("*.parquet"))
|
| 34 |
|
|
|
|
| 135 |
return None
|
| 136 |
|
| 137 |
|
| 138 |
+
def _scanFinanceFromMerged(
|
| 139 |
+
scanPath: Path,
|
| 140 |
+
sjDivs: list[str],
|
| 141 |
+
accountIds: set[str],
|
| 142 |
+
accountNms: set[str],
|
| 143 |
+
amountCol: str,
|
| 144 |
+
) -> dict[str, float]:
|
| 145 |
+
"""ํฉ์ฐ finance parquet์์ ์ข
๋ชฉ๋ณ ์ต์ ์ฐ๋ ๊ฐ ์ถ์ถ."""
|
| 146 |
+
scCol = "stockCode" if "stockCode" in pl.scan_parquet(str(scanPath)).collect_schema().names() else "stock_code"
|
| 147 |
+
|
| 148 |
+
target = (
|
| 149 |
+
pl.scan_parquet(str(scanPath))
|
| 150 |
+
.filter(
|
| 151 |
+
pl.col("sj_div").is_in(sjDivs)
|
| 152 |
+
& (pl.col("fs_nm").str.contains("์ฐ๊ฒฐ") | pl.col("fs_nm").str.contains("์ฌ๋ฌด์ ํ"))
|
| 153 |
+
)
|
| 154 |
+
.collect()
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
if target.is_empty() or "account_id" not in target.columns:
|
| 158 |
+
return {}
|
| 159 |
+
|
| 160 |
+
# ์ฐ๊ฒฐ ์ฐ์
|
| 161 |
+
cfs = target.filter(pl.col("fs_nm").str.contains("์ฐ๊ฒฐ"))
|
| 162 |
+
target = cfs if not cfs.is_empty() else target
|
| 163 |
+
|
| 164 |
+
# ์ข
๋ชฉ๋ณ ์ต์ ์ฐ๋๋ง
|
| 165 |
+
latestYear = (
|
| 166 |
+
target.group_by(scCol)
|
| 167 |
+
.agg(pl.col("bsns_year").max().alias("_maxYear"))
|
| 168 |
+
)
|
| 169 |
+
target = target.join(latestYear, on=scCol).filter(pl.col("bsns_year") == pl.col("_maxYear")).drop("_maxYear")
|
| 170 |
+
|
| 171 |
+
# ๊ณ์ ๋งค์นญ
|
| 172 |
+
matched = target.filter(
|
| 173 |
+
pl.col("account_id").is_in(list(accountIds)) | pl.col("account_nm").is_in(list(accountNms))
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
result: dict[str, float] = {}
|
| 177 |
+
for row in matched.iter_rows(named=True):
|
| 178 |
+
code = row.get(scCol, "")
|
| 179 |
+
if code and code not in result:
|
| 180 |
+
val = parse_num(row.get(amountCol))
|
| 181 |
+
if val is not None:
|
| 182 |
+
result[code] = val
|
| 183 |
+
|
| 184 |
+
return result
|
| 185 |
+
|
| 186 |
+
|
| 187 |
def scan_finance_parquets(
|
| 188 |
statement: str,
|
| 189 |
account_ids: set[str],
|
|
|
|
| 193 |
) -> dict[str, float]:
|
| 194 |
"""finance parquet ์ ์ ์ค์บ โ {์ข
๋ชฉ์ฝ๋: ๊ฐ}.
|
| 195 |
|
| 196 |
+
scan/finance.parquet ํ๋ฆฌ๋น๋๊ฐ ์์ผ๋ฉด ๋จ์ผ ํ์ผ์์ ์ฆ์ ํํฐ.
|
| 197 |
+
์์ผ๋ฉด ์ข
๋ชฉ๋ณ parquet ์ํ (fallback).
|
| 198 |
"""
|
| 199 |
from dartlab.core.dataLoader import _dataDir
|
| 200 |
|
| 201 |
+
sj_divs = [statement] if statement != "IS" else ["IS", "CIS"]
|
| 202 |
+
|
| 203 |
+
# 1์์: ํ๋ฆฌ๋น๋ scan parquet
|
| 204 |
+
scan_path = Path(_dataDir("scan")) / "finance.parquet"
|
| 205 |
+
if scan_path.exists():
|
| 206 |
+
try:
|
| 207 |
+
return _scanFinanceFromMerged(scan_path, sj_divs, account_ids, account_nms, amount_col)
|
| 208 |
+
except (pl.exceptions.PolarsError, OSError):
|
| 209 |
+
pass # fallback
|
| 210 |
+
|
| 211 |
+
# 2์์: ์ข
๋ชฉ๋ณ ์ํ (fallback)
|
| 212 |
finance_dir = Path(_dataDir("finance"))
|
| 213 |
parquet_files = sorted(finance_dir.glob("*.parquet"))
|
| 214 |
|
| 215 |
result: dict[str, float] = {}
|
|
|
|
| 216 |
for pf in parquet_files:
|
| 217 |
code = pf.stem
|
| 218 |
try:
|
src/dartlab/market/scan/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""์ ์ข
๋ชฉ ํก๋จ๋ถ์ ํ๋ฆฌ๋น๋ โ changes + finance + report ํฉ์ฐ parquet."""
|
| 2 |
+
|
| 3 |
+
from dartlab.market.scan.builder import buildScan, buildChanges, buildFinance, buildReport
|
| 4 |
+
|
| 5 |
+
__all__ = ["buildScan", "buildChanges", "buildFinance", "buildReport"]
|
src/dartlab/market/scan/builder.py
ADDED
|
@@ -0,0 +1,436 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""์ ์ข
๋ชฉ scan ํ๋ฆฌ๋น๋ ๋น๋.
|
| 2 |
+
|
| 3 |
+
docs โ changes, finance โ ํฉ์ฐ, report โ apiType๋ณ ๋ถ๋ฆฌ.
|
| 4 |
+
์คํ 014/015์์ ๊ฒ์ฆ๋ ๋ก์ง์ ํ๋ก๋์
ํ.
|
| 5 |
+
๋ฐฐ์น๋ฅผ ์ค๊ฐ ํ์ผ๋ก ์ฐ๊ณ ๋ง์ง๋ง์ ํฉ์ฐํ์ฌ segfault ๋ฐฉ์ง.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import shutil
|
| 11 |
+
import time
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
|
| 14 |
+
import polars as pl
|
| 15 |
+
|
| 16 |
+
# scanner์์ ์ค์ ์ฌ์ฉํ๋ apiType 10๊ฐ
|
| 17 |
+
SCAN_API_TYPES = [
|
| 18 |
+
"majorHolder",
|
| 19 |
+
"executive",
|
| 20 |
+
"employee",
|
| 21 |
+
"executivePayAllTotal",
|
| 22 |
+
"executivePayIndividual",
|
| 23 |
+
"auditOpinion",
|
| 24 |
+
"dividend",
|
| 25 |
+
"treasuryStock",
|
| 26 |
+
"capitalChange",
|
| 27 |
+
"corporateBond",
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
_BATCH = 200
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def _scanDir() -> Path:
|
| 34 |
+
"""scan ์ถ๋ ฅ ๋๋ ํ ๋ฆฌ."""
|
| 35 |
+
from dartlab.core.dataLoader import _dataDir
|
| 36 |
+
|
| 37 |
+
return Path(_dataDir("scan"))
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def _docsDir() -> Path:
|
| 41 |
+
from dartlab.core.dataLoader import _dataDir
|
| 42 |
+
|
| 43 |
+
return Path(_dataDir("docs"))
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _financeDir() -> Path:
|
| 47 |
+
from dartlab.core.dataLoader import _dataDir
|
| 48 |
+
|
| 49 |
+
return Path(_dataDir("finance"))
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _reportDir() -> Path:
|
| 53 |
+
from dartlab.core.dataLoader import _dataDir
|
| 54 |
+
|
| 55 |
+
return Path(_dataDir("report"))
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _log(msg: str) -> None:
|
| 59 |
+
print(msg)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _mergeBatchFiles(batchDir: Path, outputPath: Path, *, how: str = "vertical") -> int:
|
| 63 |
+
"""๋ฐฐ์น ํ์ผ๋ค์ ์ฝ์ด์ 1๊ฐ๋ก ํฉ์ฐ. ๋ฐํ: ์ด ํ์."""
|
| 64 |
+
batchFiles = sorted(batchDir.glob("batch_*.parquet"))
|
| 65 |
+
if not batchFiles:
|
| 66 |
+
return 0
|
| 67 |
+
|
| 68 |
+
parts = [pl.read_parquet(str(f)) for f in batchFiles]
|
| 69 |
+
merged = pl.concat(parts, how=how)
|
| 70 |
+
merged.write_parquet(str(outputPath), compression="zstd")
|
| 71 |
+
totalRows = merged.height
|
| 72 |
+
del merged, parts
|
| 73 |
+
return totalRows
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
# โโ changes โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def _buildRawChanges(parquetPath: Path, stockCode: str, sinceYear: int = 2021) -> pl.DataFrame | None:
|
| 80 |
+
"""raw docs parquet โ section ๋จ์ changes."""
|
| 81 |
+
try:
|
| 82 |
+
raw = pl.read_parquet(str(parquetPath))
|
| 83 |
+
except (pl.exceptions.PolarsError, OSError):
|
| 84 |
+
return None
|
| 85 |
+
|
| 86 |
+
needed = {"year", "section_order", "section_title", "section_content"}
|
| 87 |
+
if not needed.issubset(set(raw.columns)):
|
| 88 |
+
return None
|
| 89 |
+
|
| 90 |
+
raw = raw.filter(pl.col("year").cast(pl.Utf8).str.to_integer(strict=False) >= sinceYear - 1)
|
| 91 |
+
if raw.height < 2:
|
| 92 |
+
return None
|
| 93 |
+
|
| 94 |
+
work = raw.select(["year", "section_order", "section_title", "section_content"])
|
| 95 |
+
work = work.sort(["section_order", "section_title", "year"])
|
| 96 |
+
|
| 97 |
+
work = work.with_columns([
|
| 98 |
+
pl.col("year").shift(1).over(["section_order", "section_title"]).alias("_prevYear"),
|
| 99 |
+
pl.col("section_content").shift(1).over(["section_order", "section_title"]).alias("_prevContent"),
|
| 100 |
+
])
|
| 101 |
+
|
| 102 |
+
work = work.with_columns([
|
| 103 |
+
pl.col("section_content").hash().alias("_hash"),
|
| 104 |
+
pl.col("_prevContent").hash().alias("_prevHash"),
|
| 105 |
+
pl.col("section_content").str.len_chars().alias("sizeB"),
|
| 106 |
+
pl.col("_prevContent").str.len_chars().alias("sizeA"),
|
| 107 |
+
pl.col("section_content").str.slice(0, 200).alias("preview"),
|
| 108 |
+
])
|
| 109 |
+
|
| 110 |
+
changes = work.filter(
|
| 111 |
+
pl.col("_prevYear").is_not_null()
|
| 112 |
+
& ~(pl.col("section_content").is_null() & pl.col("_prevContent").is_null())
|
| 113 |
+
& (
|
| 114 |
+
(pl.col("_hash") != pl.col("_prevHash"))
|
| 115 |
+
| pl.col("section_content").is_null()
|
| 116 |
+
| pl.col("_prevContent").is_null()
|
| 117 |
+
)
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
if changes.height == 0:
|
| 121 |
+
return None
|
| 122 |
+
|
| 123 |
+
numPattern = r"[\d,.]+"
|
| 124 |
+
changes = changes.with_columns([
|
| 125 |
+
pl.col("section_content").str.replace_all(numPattern, "N").alias("_stripped"),
|
| 126 |
+
pl.col("_prevContent").str.replace_all(numPattern, "N").alias("_prevStripped"),
|
| 127 |
+
])
|
| 128 |
+
|
| 129 |
+
changes = changes.with_columns(
|
| 130 |
+
pl.when(pl.col("_prevContent").is_null())
|
| 131 |
+
.then(pl.lit("appeared"))
|
| 132 |
+
.when(pl.col("section_content").is_null())
|
| 133 |
+
.then(pl.lit("disappeared"))
|
| 134 |
+
.when(pl.col("_stripped") == pl.col("_prevStripped"))
|
| 135 |
+
.then(pl.lit("numeric"))
|
| 136 |
+
.when(
|
| 137 |
+
(pl.col("sizeA") > 0)
|
| 138 |
+
& ((pl.col("sizeB").cast(pl.Int64) - pl.col("sizeA").cast(pl.Int64)).abs().cast(pl.Float64)
|
| 139 |
+
/ pl.col("sizeA").cast(pl.Float64) > 0.5)
|
| 140 |
+
)
|
| 141 |
+
.then(pl.lit("structural"))
|
| 142 |
+
.otherwise(pl.lit("wording"))
|
| 143 |
+
.alias("changeType")
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
changes = changes.filter(pl.col("year").cast(pl.Utf8).str.to_integer(strict=False) >= sinceYear)
|
| 147 |
+
|
| 148 |
+
return changes.select([
|
| 149 |
+
pl.col("_prevYear").alias("fromPeriod"),
|
| 150 |
+
pl.col("year").alias("toPeriod"),
|
| 151 |
+
pl.col("section_title").alias("sectionTitle"),
|
| 152 |
+
pl.col("changeType"),
|
| 153 |
+
pl.col("sizeA"),
|
| 154 |
+
pl.col("sizeB"),
|
| 155 |
+
(pl.col("sizeB").cast(pl.Int64) - pl.col("sizeA").cast(pl.Int64)).alias("sizeDelta"),
|
| 156 |
+
pl.col("preview"),
|
| 157 |
+
pl.lit(stockCode).alias("stockCode"),
|
| 158 |
+
])
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def buildChanges(*, sinceYear: int = 2021, verbose: bool = True) -> Path | None:
|
| 162 |
+
"""docs โ changes ํ๋ฆฌ๋น๋. ๋ฐํ: ์ถ๋ ฅ parquet ๊ฒฝ๋ก."""
|
| 163 |
+
docsDir = _docsDir()
|
| 164 |
+
outDir = _scanDir()
|
| 165 |
+
outDir.mkdir(parents=True, exist_ok=True)
|
| 166 |
+
outputPath = outDir / "changes.parquet"
|
| 167 |
+
batchDir = outDir / "_tmp_changes"
|
| 168 |
+
batchDir.mkdir(parents=True, exist_ok=True)
|
| 169 |
+
|
| 170 |
+
allFiles = sorted(docsDir.glob("*.parquet"))
|
| 171 |
+
if not allFiles:
|
| 172 |
+
if verbose:
|
| 173 |
+
_log("docs parquet ์์ โ changes ๋น๋ ๊ฑด๋๋")
|
| 174 |
+
return None
|
| 175 |
+
|
| 176 |
+
if verbose:
|
| 177 |
+
_log(f"[changes] {len(allFiles)}์ข
๋ชฉ, sinceYear={sinceYear}")
|
| 178 |
+
|
| 179 |
+
t0 = time.perf_counter()
|
| 180 |
+
batchChunks: list[pl.DataFrame] = []
|
| 181 |
+
success = 0
|
| 182 |
+
failed = 0
|
| 183 |
+
totalRows = 0
|
| 184 |
+
batchIdx = 0
|
| 185 |
+
|
| 186 |
+
for i, pf in enumerate(allFiles):
|
| 187 |
+
result = _buildRawChanges(pf, pf.stem, sinceYear)
|
| 188 |
+
if result is not None and result.height > 0:
|
| 189 |
+
batchChunks.append(result)
|
| 190 |
+
totalRows += result.height
|
| 191 |
+
success += 1
|
| 192 |
+
else:
|
| 193 |
+
failed += 1
|
| 194 |
+
|
| 195 |
+
if len(batchChunks) >= _BATCH or i == len(allFiles) - 1:
|
| 196 |
+
if batchChunks:
|
| 197 |
+
batch = pl.concat(batchChunks)
|
| 198 |
+
batch.write_parquet(str(batchDir / f"batch_{batchIdx:03d}.parquet"), compression="zstd")
|
| 199 |
+
del batch
|
| 200 |
+
batchChunks = []
|
| 201 |
+
batchIdx += 1
|
| 202 |
+
|
| 203 |
+
if verbose and (i + 1) % 500 == 0:
|
| 204 |
+
_log(f" [{i+1}/{len(allFiles)}] {success}ok {failed}fail {totalRows:,}rows {time.perf_counter()-t0:.0f}s")
|
| 205 |
+
|
| 206 |
+
if batchIdx == 0:
|
| 207 |
+
if verbose:
|
| 208 |
+
_log(" changes ๊ฒฐ๊ณผ ์์")
|
| 209 |
+
shutil.rmtree(batchDir, ignore_errors=True)
|
| 210 |
+
return None
|
| 211 |
+
|
| 212 |
+
_mergeBatchFiles(batchDir, outputPath)
|
| 213 |
+
shutil.rmtree(batchDir, ignore_errors=True)
|
| 214 |
+
|
| 215 |
+
elapsed = time.perf_counter() - t0
|
| 216 |
+
diskMb = outputPath.stat().st_size / 1024 / 1024
|
| 217 |
+
if verbose:
|
| 218 |
+
_log(f" ์๋ฃ: {success}์ข
๋ชฉ, {totalRows:,}ํ, {diskMb:.1f}MB, {elapsed:.0f}์ด")
|
| 219 |
+
|
| 220 |
+
return outputPath
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
# โโ finance โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def buildFinance(*, sinceYear: int = 2021, verbose: bool = True) -> Path | None:
|
| 227 |
+
"""finance ์ ์ข
๋ชฉ ํฉ์ฐ. ๋ฐํ: ์ถ๋ ฅ parquet ๊ฒฝ๋ก."""
|
| 228 |
+
finDir = _financeDir()
|
| 229 |
+
outDir = _scanDir()
|
| 230 |
+
outDir.mkdir(parents=True, exist_ok=True)
|
| 231 |
+
outputPath = outDir / "finance.parquet"
|
| 232 |
+
batchDir = outDir / "_tmp_finance"
|
| 233 |
+
batchDir.mkdir(parents=True, exist_ok=True)
|
| 234 |
+
|
| 235 |
+
allFiles = sorted(finDir.glob("*.parquet"))
|
| 236 |
+
if not allFiles:
|
| 237 |
+
if verbose:
|
| 238 |
+
_log("finance parquet ์์ โ ๋น๋ ๊ฑด๋๋")
|
| 239 |
+
return None
|
| 240 |
+
|
| 241 |
+
if verbose:
|
| 242 |
+
_log(f"[finance] {len(allFiles)}์ข
๋ชฉ, sinceYear={sinceYear}")
|
| 243 |
+
|
| 244 |
+
t0 = time.perf_counter()
|
| 245 |
+
batchChunks: list[pl.DataFrame] = []
|
| 246 |
+
success = 0
|
| 247 |
+
totalRows = 0
|
| 248 |
+
batchIdx = 0
|
| 249 |
+
|
| 250 |
+
for i, pf in enumerate(allFiles):
|
| 251 |
+
try:
|
| 252 |
+
df = pl.read_parquet(str(pf))
|
| 253 |
+
except (pl.exceptions.PolarsError, OSError):
|
| 254 |
+
continue
|
| 255 |
+
|
| 256 |
+
if "stockCode" not in df.columns and "stock_code" not in df.columns:
|
| 257 |
+
df = df.with_columns(pl.lit(pf.stem).alias("stockCode"))
|
| 258 |
+
elif "stock_code" in df.columns and "stockCode" not in df.columns:
|
| 259 |
+
df = df.rename({"stock_code": "stockCode"})
|
| 260 |
+
|
| 261 |
+
if "bsns_year" in df.columns:
|
| 262 |
+
df = df.filter(
|
| 263 |
+
pl.col("bsns_year").cast(pl.Utf8).str.to_integer(strict=False) >= sinceYear
|
| 264 |
+
)
|
| 265 |
+
|
| 266 |
+
if df.height == 0:
|
| 267 |
+
continue
|
| 268 |
+
|
| 269 |
+
batchChunks.append(df)
|
| 270 |
+
totalRows += df.height
|
| 271 |
+
success += 1
|
| 272 |
+
|
| 273 |
+
if len(batchChunks) >= _BATCH or i == len(allFiles) - 1:
|
| 274 |
+
if batchChunks:
|
| 275 |
+
batch = pl.concat(batchChunks, how="diagonal_relaxed")
|
| 276 |
+
batch.write_parquet(str(batchDir / f"batch_{batchIdx:03d}.parquet"), compression="zstd")
|
| 277 |
+
del batch
|
| 278 |
+
batchChunks = []
|
| 279 |
+
batchIdx += 1
|
| 280 |
+
|
| 281 |
+
if verbose and (i + 1) % 500 == 0:
|
| 282 |
+
_log(f" [{i+1}/{len(allFiles)}] {success}ok {totalRows:,}rows {time.perf_counter()-t0:.0f}s")
|
| 283 |
+
|
| 284 |
+
if batchIdx == 0:
|
| 285 |
+
if verbose:
|
| 286 |
+
_log(" finance ๊ฒฐ๊ณผ ์์")
|
| 287 |
+
shutil.rmtree(batchDir, ignore_errors=True)
|
| 288 |
+
return None
|
| 289 |
+
|
| 290 |
+
_mergeBatchFiles(batchDir, outputPath, how="diagonal_relaxed")
|
| 291 |
+
shutil.rmtree(batchDir, ignore_errors=True)
|
| 292 |
+
|
| 293 |
+
elapsed = time.perf_counter() - t0
|
| 294 |
+
diskMb = outputPath.stat().st_size / 1024 / 1024
|
| 295 |
+
if verbose:
|
| 296 |
+
_log(f" ์๋ฃ: {success}์ข
๋ชฉ, {totalRows:,}ํ, {diskMb:.1f}MB, {elapsed:.0f}์ด")
|
| 297 |
+
|
| 298 |
+
return outputPath
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
# โโ report โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ๏ฟฝ๏ฟฝ๏ฟฝโโโโโโโโโ
|
| 302 |
+
|
| 303 |
+
|
| 304 |
+
def buildReport(*, sinceYear: int = 2021, verbose: bool = True) -> list[Path]:
|
| 305 |
+
"""report โ apiType๋ณ ๋ถ๋ฆฌ parquet. ๋ฐํ: ์์ฑ๋ ํ์ผ ๊ฒฝ๋ก ๋ชฉ๋ก."""
|
| 306 |
+
repDir = _reportDir()
|
| 307 |
+
outDir = _scanDir() / "report"
|
| 308 |
+
outDir.mkdir(parents=True, exist_ok=True)
|
| 309 |
+
|
| 310 |
+
allFiles = sorted(repDir.glob("*.parquet"))
|
| 311 |
+
if not allFiles:
|
| 312 |
+
if verbose:
|
| 313 |
+
_log("report parquet ์์ โ ๋น๋ ๊ฑด๋๋")
|
| 314 |
+
return []
|
| 315 |
+
|
| 316 |
+
if verbose:
|
| 317 |
+
_log(f"[report] {len(allFiles)}์ข
๋ชฉ โ apiType๋ณ ๋ถ๋ฆฌ")
|
| 318 |
+
|
| 319 |
+
t0 = time.perf_counter()
|
| 320 |
+
|
| 321 |
+
# apiType๋ณ ๋ฐฐ์น ๋๋ ํ ๋ฆฌ
|
| 322 |
+
apiBatchDirs: dict[str, Path] = {}
|
| 323 |
+
apiBatchIdx: dict[str, int] = {}
|
| 324 |
+
apiChunks: dict[str, list[pl.DataFrame]] = {}
|
| 325 |
+
apiRows: dict[str, int] = {}
|
| 326 |
+
for at in SCAN_API_TYPES:
|
| 327 |
+
bd = outDir / f"_tmp_{at}"
|
| 328 |
+
bd.mkdir(parents=True, exist_ok=True)
|
| 329 |
+
apiBatchDirs[at] = bd
|
| 330 |
+
apiBatchIdx[at] = 0
|
| 331 |
+
apiChunks[at] = []
|
| 332 |
+
apiRows[at] = 0
|
| 333 |
+
|
| 334 |
+
processed = 0
|
| 335 |
+
|
| 336 |
+
for i, pf in enumerate(allFiles):
|
| 337 |
+
try:
|
| 338 |
+
df = pl.read_parquet(str(pf))
|
| 339 |
+
except (pl.exceptions.PolarsError, OSError):
|
| 340 |
+
continue
|
| 341 |
+
|
| 342 |
+
if "apiType" not in df.columns:
|
| 343 |
+
continue
|
| 344 |
+
|
| 345 |
+
if "stockCode" not in df.columns and "stock_code" not in df.columns:
|
| 346 |
+
df = df.with_columns(pl.lit(pf.stem).alias("stockCode"))
|
| 347 |
+
|
| 348 |
+
if "year" in df.columns:
|
| 349 |
+
df = df.with_columns(
|
| 350 |
+
pl.col("year").cast(pl.Utf8).str.to_integer(strict=False).alias("_yearInt")
|
| 351 |
+
)
|
| 352 |
+
df = df.filter(
|
| 353 |
+
pl.col("_yearInt").is_null() | (pl.col("_yearInt") >= sinceYear)
|
| 354 |
+
).drop("_yearInt")
|
| 355 |
+
|
| 356 |
+
processed += 1
|
| 357 |
+
|
| 358 |
+
for apiType in SCAN_API_TYPES:
|
| 359 |
+
sub = df.filter(pl.col("apiType") == apiType)
|
| 360 |
+
if sub.height > 0:
|
| 361 |
+
apiChunks[apiType].append(sub)
|
| 362 |
+
apiRows[apiType] += sub.height
|
| 363 |
+
|
| 364 |
+
if len(apiChunks[apiType]) >= _BATCH:
|
| 365 |
+
batch = pl.concat(apiChunks[apiType], how="diagonal_relaxed")
|
| 366 |
+
idx = apiBatchIdx[apiType]
|
| 367 |
+
batch.write_parquet(
|
| 368 |
+
str(apiBatchDirs[apiType] / f"batch_{idx:03d}.parquet"),
|
| 369 |
+
compression="zstd",
|
| 370 |
+
)
|
| 371 |
+
del batch
|
| 372 |
+
apiChunks[apiType] = []
|
| 373 |
+
apiBatchIdx[apiType] = idx + 1
|
| 374 |
+
|
| 375 |
+
if verbose and (i + 1) % 500 == 0:
|
| 376 |
+
_log(f" [{i+1}/{len(allFiles)}] {processed}ok {time.perf_counter()-t0:.0f}s")
|
| 377 |
+
|
| 378 |
+
# ๋จ์ ์ฒญํฌ flush + ํฉ์ฐ
|
| 379 |
+
outputs: list[Path] = []
|
| 380 |
+
for apiType in SCAN_API_TYPES:
|
| 381 |
+
# ๋จ์ ์ฒญํฌ ์ฐ๊ธฐ
|
| 382 |
+
if apiChunks[apiType]:
|
| 383 |
+
batch = pl.concat(apiChunks[apiType], how="diagonal_relaxed")
|
| 384 |
+
idx = apiBatchIdx[apiType]
|
| 385 |
+
batch.write_parquet(
|
| 386 |
+
str(apiBatchDirs[apiType] / f"batch_{idx:03d}.parquet"),
|
| 387 |
+
compression="zstd",
|
| 388 |
+
)
|
| 389 |
+
del batch
|
| 390 |
+
apiBatchIdx[apiType] = idx + 1
|
| 391 |
+
|
| 392 |
+
if apiBatchIdx[apiType] == 0:
|
| 393 |
+
shutil.rmtree(apiBatchDirs[apiType], ignore_errors=True)
|
| 394 |
+
continue
|
| 395 |
+
|
| 396 |
+
outPath = outDir / f"{apiType}.parquet"
|
| 397 |
+
_mergeBatchFiles(apiBatchDirs[apiType], outPath, how="diagonal_relaxed")
|
| 398 |
+
shutil.rmtree(apiBatchDirs[apiType], ignore_errors=True)
|
| 399 |
+
|
| 400 |
+
diskMb = outPath.stat().st_size / 1024 / 1024
|
| 401 |
+
outputs.append(outPath)
|
| 402 |
+
if verbose:
|
| 403 |
+
_log(f" {apiType}: {apiRows[apiType]:,}ํ, {diskMb:.1f}MB")
|
| 404 |
+
|
| 405 |
+
elapsed = time.perf_counter() - t0
|
| 406 |
+
if verbose:
|
| 407 |
+
_log(f" report ์๋ฃ: {len(outputs)}๊ฐ apiType, {elapsed:.0f}์ด")
|
| 408 |
+
|
| 409 |
+
return outputs
|
| 410 |
+
|
| 411 |
+
|
| 412 |
+
# โโ ์ ์ฒด ๋น๋ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 413 |
+
|
| 414 |
+
|
| 415 |
+
def buildScan(*, sinceYear: int = 2021, verbose: bool = True) -> dict[str, Path | list[Path] | None]:
|
| 416 |
+
"""changes + finance + report ์ ์ฒด ํ๋ฆฌ๋น๋."""
|
| 417 |
+
if verbose:
|
| 418 |
+
_log(f"์ ์ข
๋ชฉ scan ํ๋ฆฌ๋น๋ ์์ (sinceYear={sinceYear})")
|
| 419 |
+
_log("=" * 60)
|
| 420 |
+
|
| 421 |
+
results: dict[str, Path | list[Path] | None] = {}
|
| 422 |
+
|
| 423 |
+
results["changes"] = buildChanges(sinceYear=sinceYear, verbose=verbose)
|
| 424 |
+
results["finance"] = buildFinance(sinceYear=sinceYear, verbose=verbose)
|
| 425 |
+
results["report"] = buildReport(sinceYear=sinceYear, verbose=verbose)
|
| 426 |
+
|
| 427 |
+
if verbose:
|
| 428 |
+
_log("=" * 60)
|
| 429 |
+
scanDir = _scanDir()
|
| 430 |
+
if scanDir.exists():
|
| 431 |
+
totalMb = sum(
|
| 432 |
+
f.stat().st_size for f in scanDir.rglob("*.parquet")
|
| 433 |
+
) / 1024 / 1024
|
| 434 |
+
_log(f"scan ์ ์ฒด: {totalMb:.1f}MB")
|
| 435 |
+
|
| 436 |
+
return results
|
src/dartlab/providers/dart/_sections_source.py
CHANGED
|
@@ -6,6 +6,7 @@ raw DataFrame๋ฅผ ๊ฐ์ธ๋, ๊ฐ์ ๊ฒฝ๋ก์์ freq/semantic ํ์ํ๋ฅผ ๋ฐ
|
|
| 6 |
|
| 7 |
from __future__ import annotations
|
| 8 |
|
|
|
|
| 9 |
from typing import TYPE_CHECKING, Any
|
| 10 |
|
| 11 |
import polars as pl
|
|
@@ -13,6 +14,9 @@ import polars as pl
|
|
| 13 |
if TYPE_CHECKING:
|
| 14 |
from dartlab.providers.dart.company import Company
|
| 15 |
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
class _SectionsSource:
|
| 18 |
"""sections source-of-truth accessor.
|
|
@@ -176,6 +180,38 @@ class _SectionsSource:
|
|
| 176 |
changedOnly=changedOnly,
|
| 177 |
)
|
| 178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
def __getattr__(self, name: str) -> Any:
|
| 180 |
frame = self.raw
|
| 181 |
if frame is None:
|
|
@@ -199,6 +235,120 @@ class _SectionsSource:
|
|
| 199 |
return (
|
| 200 |
"SectionsSource("
|
| 201 |
"shape="
|
| 202 |
-
f"{frame.shape}, methods=[raw, topics(), outline(), periods(), ordered(), coverage(), freq(), semanticRegistry(), semanticCollisions(), structureRegistry(), structureCollisions(), structureEvents(), structureSummary(), structureChanges()]"
|
| 203 |
")"
|
| 204 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
from __future__ import annotations
|
| 8 |
|
| 9 |
+
import re
|
| 10 |
from typing import TYPE_CHECKING, Any
|
| 11 |
|
| 12 |
import polars as pl
|
|
|
|
| 14 |
if TYPE_CHECKING:
|
| 15 |
from dartlab.providers.dart.company import Company
|
| 16 |
|
| 17 |
+
_PERIOD_RE = re.compile(r"^\d{4}$")
|
| 18 |
+
_NUM_PATTERN = r"[\d,.]+"
|
| 19 |
+
|
| 20 |
|
| 21 |
class _SectionsSource:
|
| 22 |
"""sections source-of-truth accessor.
|
|
|
|
| 180 |
changedOnly=changedOnly,
|
| 181 |
)
|
| 182 |
|
| 183 |
+
def changes(
|
| 184 |
+
self,
|
| 185 |
+
*,
|
| 186 |
+
topic: str | None = None,
|
| 187 |
+
fromPeriod: str | None = None,
|
| 188 |
+
toPeriod: str | None = None,
|
| 189 |
+
) -> pl.DataFrame | None:
|
| 190 |
+
"""๊ธฐ๊ฐ ๊ฐ ๋ณํ ๋ธ๋ก ์ถ์ถ (๋ฒกํฐํ).
|
| 191 |
+
|
| 192 |
+
sections wide DataFrame์์ ์ธ์ ๊ธฐ๊ฐ ๋น๊ต๋ก ๋ณํ๋ง ์ถ์ถ.
|
| 193 |
+
5์ข
์ ํ: appeared, disappeared, numeric, structural, wording.
|
| 194 |
+
"""
|
| 195 |
+
frame = self.raw
|
| 196 |
+
if frame is None:
|
| 197 |
+
return None
|
| 198 |
+
return _buildChanges(frame, topic=topic, fromPeriod=fromPeriod, toPeriod=toPeriod)
|
| 199 |
+
|
| 200 |
+
def changeSummary(self, *, topN: int = 10) -> pl.DataFrame | None:
|
| 201 |
+
"""topic๋ณ ๋ณํ ์์ฝ โ AI ์ปจํ
์คํธ์ฉ."""
|
| 202 |
+
ch = self.changes()
|
| 203 |
+
if ch is None or ch.is_empty():
|
| 204 |
+
return None
|
| 205 |
+
return (
|
| 206 |
+
ch.group_by(["topic", "changeType"])
|
| 207 |
+
.agg(
|
| 208 |
+
pl.len().alias("count"),
|
| 209 |
+
pl.col("sizeDelta").mean().round(0).cast(pl.Int64).alias("avgDelta"),
|
| 210 |
+
)
|
| 211 |
+
.sort(["topic", "count"], descending=[False, True])
|
| 212 |
+
.head(topN * 5)
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
def __getattr__(self, name: str) -> Any:
|
| 216 |
frame = self.raw
|
| 217 |
if frame is None:
|
|
|
|
| 235 |
return (
|
| 236 |
"SectionsSource("
|
| 237 |
"shape="
|
| 238 |
+
f"{frame.shape}, methods=[raw, topics(), outline(), periods(), ordered(), coverage(), freq(), changes(), changeSummary(), semanticRegistry(), semanticCollisions(), structureRegistry(), structureCollisions(), structureEvents(), structureSummary(), structureChanges()]"
|
| 239 |
")"
|
| 240 |
)
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
def _buildChanges(
|
| 244 |
+
sections: pl.DataFrame,
|
| 245 |
+
*,
|
| 246 |
+
topic: str | None = None,
|
| 247 |
+
fromPeriod: str | None = None,
|
| 248 |
+
toPeriod: str | None = None,
|
| 249 |
+
) -> pl.DataFrame:
|
| 250 |
+
"""sections wide DataFrame โ ๋ณํ ๋ธ๋ก DataFrame (๋ฒกํฐํ).
|
| 251 |
+
|
| 252 |
+
์คํ 101-010์์ ๊ฒ์ฆ๋ Polars ๋ฒกํฐํ ํจํด.
|
| 253 |
+
0.15์ด์ 22,060ํ ์์ฑ (Python ๋ฃจํ ๋๋น 12x).
|
| 254 |
+
"""
|
| 255 |
+
annualCols = sorted(c for c in sections.columns if _PERIOD_RE.match(c))
|
| 256 |
+
if len(annualCols) < 2:
|
| 257 |
+
return pl.DataFrame()
|
| 258 |
+
|
| 259 |
+
metaCols = ["topic"]
|
| 260 |
+
for col in ("textPathKey", "blockType", "blockOrder"):
|
| 261 |
+
if col in sections.columns:
|
| 262 |
+
metaCols.append(col)
|
| 263 |
+
|
| 264 |
+
if topic is not None:
|
| 265 |
+
sections = sections.filter(pl.col("topic") == topic)
|
| 266 |
+
if sections.is_empty():
|
| 267 |
+
return pl.DataFrame()
|
| 268 |
+
|
| 269 |
+
work = sections.with_row_index("_row")
|
| 270 |
+
|
| 271 |
+
# wide โ long
|
| 272 |
+
long = work.select(["_row"] + metaCols + annualCols).unpivot(
|
| 273 |
+
index=["_row"] + metaCols,
|
| 274 |
+
on=annualCols,
|
| 275 |
+
variable_name="period",
|
| 276 |
+
value_name="text",
|
| 277 |
+
)
|
| 278 |
+
long = long.with_columns(pl.col("text").cast(pl.Utf8))
|
| 279 |
+
|
| 280 |
+
# hash + len (null ๋ณด์กด)
|
| 281 |
+
long = long.with_columns(
|
| 282 |
+
pl.when(pl.col("text").is_not_null())
|
| 283 |
+
.then(pl.col("text").hash())
|
| 284 |
+
.otherwise(pl.lit(None, dtype=pl.UInt64))
|
| 285 |
+
.alias("_hash"),
|
| 286 |
+
pl.when(pl.col("text").is_not_null())
|
| 287 |
+
.then(pl.col("text").str.len_chars())
|
| 288 |
+
.otherwise(pl.lit(None, dtype=pl.UInt32))
|
| 289 |
+
.alias("_len"),
|
| 290 |
+
pl.when(pl.col("text").is_not_null())
|
| 291 |
+
.then(pl.col("text").str.slice(0, 200))
|
| 292 |
+
.otherwise(pl.lit(None, dtype=pl.Utf8))
|
| 293 |
+
.alias("preview"),
|
| 294 |
+
)
|
| 295 |
+
|
| 296 |
+
# ์ธ์ ๊ธฐ๊ฐ ๋น๊ต
|
| 297 |
+
long = long.sort(["_row", "period"])
|
| 298 |
+
long = long.with_columns(
|
| 299 |
+
pl.col("period").shift(1).over("_row").alias("_prevPeriod"),
|
| 300 |
+
pl.col("_hash").shift(1).over("_row").alias("_prevHash"),
|
| 301 |
+
pl.col("_len").shift(1).over("_row").alias("_prevLen"),
|
| 302 |
+
pl.col("text").shift(1).over("_row").alias("_prevText"),
|
| 303 |
+
)
|
| 304 |
+
|
| 305 |
+
# ๋ณํ ํํฐ
|
| 306 |
+
changes = long.filter(
|
| 307 |
+
pl.col("_prevPeriod").is_not_null()
|
| 308 |
+
& ~(pl.col("text").is_null() & pl.col("_prevText").is_null())
|
| 309 |
+
& ((pl.col("_hash") != pl.col("_prevHash")) | pl.col("text").is_null() | pl.col("_prevText").is_null())
|
| 310 |
+
)
|
| 311 |
+
|
| 312 |
+
if changes.is_empty():
|
| 313 |
+
return pl.DataFrame()
|
| 314 |
+
|
| 315 |
+
# ๊ธฐ๊ฐ ํํฐ
|
| 316 |
+
if fromPeriod is not None:
|
| 317 |
+
changes = changes.filter(pl.col("_prevPeriod") >= fromPeriod)
|
| 318 |
+
if toPeriod is not None:
|
| 319 |
+
changes = changes.filter(pl.col("period") <= toPeriod)
|
| 320 |
+
|
| 321 |
+
# ๋ณํ ์ ํ ๋ถ๋ฅ
|
| 322 |
+
changes = changes.with_columns(
|
| 323 |
+
pl.col("text").str.replace_all(_NUM_PATTERN, "N").alias("_stripped"),
|
| 324 |
+
pl.col("_prevText").str.replace_all(_NUM_PATTERN, "N").alias("_prevStripped"),
|
| 325 |
+
)
|
| 326 |
+
|
| 327 |
+
changes = changes.with_columns(
|
| 328 |
+
pl.when(pl.col("_prevText").is_null())
|
| 329 |
+
.then(pl.lit("appeared"))
|
| 330 |
+
.when(pl.col("text").is_null())
|
| 331 |
+
.then(pl.lit("disappeared"))
|
| 332 |
+
.when(pl.col("_stripped") == pl.col("_prevStripped"))
|
| 333 |
+
.then(pl.lit("numeric"))
|
| 334 |
+
.when(
|
| 335 |
+
(pl.col("_prevLen") > 0)
|
| 336 |
+
& (
|
| 337 |
+
(pl.col("_len").cast(pl.Int64) - pl.col("_prevLen").cast(pl.Int64)).abs().cast(pl.Float64)
|
| 338 |
+
/ pl.col("_prevLen").cast(pl.Float64)
|
| 339 |
+
> 0.5
|
| 340 |
+
)
|
| 341 |
+
)
|
| 342 |
+
.then(pl.lit("structural"))
|
| 343 |
+
.otherwise(pl.lit("wording"))
|
| 344 |
+
.alias("changeType")
|
| 345 |
+
)
|
| 346 |
+
|
| 347 |
+
# ๊ฒฐ๊ณผ ์ ๋ฆฌ
|
| 348 |
+
resultCols = ["_prevPeriod", "period", "changeType", "_prevLen", "_len", "preview"] + metaCols
|
| 349 |
+
renameMap = {"_prevPeriod": "fromPeriod", "period": "toPeriod", "_prevLen": "sizeA", "_len": "sizeB"}
|
| 350 |
+
|
| 351 |
+
result = changes.select(resultCols).rename(renameMap)
|
| 352 |
+
result = result.with_columns((pl.col("sizeB").cast(pl.Int64) - pl.col("sizeA").cast(pl.Int64)).alias("sizeDelta"))
|
| 353 |
+
|
| 354 |
+
return result
|
src/dartlab/review/builders.py
CHANGED
|
@@ -396,21 +396,18 @@ def fundingSourcesBlock(data: dict) -> list:
|
|
| 396 |
)
|
| 397 |
blocks.append(MetricBlock(metrics))
|
| 398 |
|
| 399 |
-
# ์๊ณ์ด ํ
์ด๋ธ
|
| 400 |
history = data.get("history", [])
|
| 401 |
if len(history) >= 2:
|
| 402 |
-
|
| 403 |
for h in history:
|
| 404 |
-
|
| 405 |
-
{
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
}
|
| 412 |
-
)
|
| 413 |
-
blocks.append(TableBlock("์กฐ๋ฌ์ ๋น์ค ์ถ์ด", pl.DataFrame(histRows)))
|
| 414 |
|
| 415 |
# ๋ณด์ถฉ ์งํ (์์ฐจ์
๊ธ/EBITDA, ์๋ฌต์ ์ฐจ์
๊ธ๋ฆฌ)
|
| 416 |
suppMetrics = []
|
|
|
|
| 396 |
)
|
| 397 |
blocks.append(MetricBlock(metrics))
|
| 398 |
|
| 399 |
+
# ์๊ณ์ด ํ
์ด๋ธ (ํ=ํญ๋ชฉ, ์ด=๊ธฐ๊ฐ)
|
| 400 |
history = data.get("history", [])
|
| 401 |
if len(history) >= 2:
|
| 402 |
+
cols = {"": ["๋ด๋ถ์ ๋ณด", "์ฃผ์ฃผ์๋ณธ", "๊ธ์ต์ฐจ์
", "์์
์กฐ๋ฌ"]}
|
| 403 |
for h in history:
|
| 404 |
+
cols[h["period"]] = [
|
| 405 |
+
f"{h['retainedPct']:.0f}%",
|
| 406 |
+
f"{h['paidInPct']:.0f}%",
|
| 407 |
+
f"{h['finDebtPct']:.0f}%",
|
| 408 |
+
f"{h['opFundingPct']:.0f}%",
|
| 409 |
+
]
|
| 410 |
+
blocks.append(TableBlock("์กฐ๋ฌ์ ๋น์ค ์ถ์ด", pl.DataFrame(cols)))
|
|
|
|
|
|
|
|
|
|
| 411 |
|
| 412 |
# ๋ณด์ถฉ ์งํ (์์ฐจ์
๊ธ/EBITDA, ์๋ฌต์ ์ฐจ์
๊ธ๋ฆฌ)
|
| 413 |
suppMetrics = []
|