Spaces:
Paused
Paused
| """keyword_analyzer capability โ ํต์ฌํค์๋+์ฐ๊ด์ด ์กฐํฉ. | |
| Issue #488: ๋ฏผ์ ํค์๋ ๋ถ์ ๋๊ตฌ. | |
| 2๊ฐ API(ํต์ฌํค์๋, ์ฐ๊ด์ด)๋ฅผ ์กฐํฉํ์ฌ | |
| ํค์๋ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ์ ๊ณตํ๋ค. | |
| """ | |
| from __future__ import annotations | |
| import asyncio | |
| from typing import Any, Dict, List, Optional | |
| from loguru import logger | |
| from .base import ( | |
| CapabilityBase, | |
| CapabilityMetadata, | |
| EvidenceEnvelope, | |
| EvidenceItem, | |
| LookupResult, | |
| ) | |
| from .defaults import get_timeout | |
| class KeywordAnalyzerCapability(CapabilityBase): | |
| """๋ฏผ์ ํค์๋ ๋ถ์ capability. | |
| ํต์ฌํค์๋์ ์ฐ๊ด์ด๋ฅผ ์กฐํฉํ์ฌ ํค์๋ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ์ ๊ณตํ๋ค. | |
| Parameters | |
| ---------- | |
| action : Optional[MinwonAnalysisAction] | |
| API ํธ์ถ์ฉ Action ์ธ์คํด์ค. None์ด๋ฉด ๋น ๊ฒฐ๊ณผ ๋ฐํ. | |
| """ | |
| def __init__(self, action: Optional[Any] = None) -> None: | |
| self._action = action | |
| def metadata(self) -> CapabilityMetadata: | |
| return CapabilityMetadata( | |
| name="keyword_analyzer", | |
| description=("ํต์ฌํค์๋์ ์ฐ๊ด์ด๋ฅผ ์กฐํฉํ์ฌ " "๋ฏผ์ ํค์๋ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ์ ๊ณตํฉ๋๋ค."), | |
| approval_summary="๊ณต๊ณต๋ฐ์ดํฐํฌํธ์์ ๋ฏผ์ ํค์๋๋ฅผ ๋ถ์ํฉ๋๋ค.", | |
| provider="data.go.kr", | |
| timeout_sec=get_timeout("keyword_analyzer"), | |
| parameters={ | |
| "type": "object", | |
| "properties": { | |
| "query": { | |
| "type": "string", | |
| "description": "ํค์๋ ๋ถ์ ๋์ ์ง์๋ฌธ", | |
| }, | |
| "date_from": { | |
| "type": "string", | |
| "description": "๋ถ์ ์์์ผ (YYYYMMDD ํ์, 8์๋ฆฌ ํ์). ์: '20260101'", | |
| }, | |
| "date_to": { | |
| "type": "string", | |
| "description": "๋ถ์ ์ข ๋ฃ์ผ (YYYYMMDD ํ์, 8์๋ฆฌ ํ์). ์: '20260408'", | |
| }, | |
| "searchword": { | |
| "type": "string", | |
| "description": "์ฐ๊ด์ด ๋ถ์ ์ ๊ธฐ์ค ํค์๋. ์ฐ๊ด์ด ๋ถ์์๋ ํ์", | |
| }, | |
| "result_count": { | |
| "type": "integer", | |
| "description": "๋ฐํํ ํค์๋ ์ (๊ธฐ๋ณธ๊ฐ 5)", | |
| "default": 5, | |
| }, | |
| }, | |
| "required": ["query", "date_from", "date_to"], | |
| }, | |
| ) | |
| async def execute( | |
| self, | |
| query: str, | |
| context: Dict[str, Any], | |
| session: Any, | |
| ) -> LookupResult: | |
| """ํต์ฌํค์๋ + ์ฐ๊ด์ด API๋ฅผ ๋ณ๋ ฌ ํธ์ถํ๊ณ ๊ฒฐ๊ณผ๋ฅผ ์กฐํฉํ๋ค.""" | |
| provider = self.metadata.provider | |
| if not query or not query.strip(): | |
| return LookupResult( | |
| success=False, | |
| query=query, | |
| provider=provider, | |
| error="query๊ฐ ๋น์ด์์ต๋๋ค", | |
| empty_reason="validation_error", | |
| evidence=EvidenceEnvelope(status="error", errors=["query๊ฐ ๋น์ด์์ต๋๋ค"]), | |
| ) | |
| if self._action is None: | |
| logger.debug("[keyword_analyzer] action์ด None - ๋น ๊ฒฐ๊ณผ ๋ฐํ") | |
| return LookupResult( | |
| success=True, | |
| query=query, | |
| provider=provider, | |
| empty_reason="no_match", | |
| evidence=EvidenceEnvelope(status="empty"), | |
| ) | |
| date_from = context.get("date_from", "") | |
| date_to = context.get("date_to", "") | |
| searchword = context.get("searchword", "") | |
| result_count = int(context.get("result_count", 5)) | |
| try: | |
| core_kw, related = await asyncio.wait_for( | |
| self._fetch_all(date_from, date_to, searchword, result_count), | |
| timeout=self.metadata.timeout_sec, | |
| ) | |
| except asyncio.TimeoutError: | |
| msg = f"API ํธ์ถ ํ์์์ ({self.metadata.timeout_sec}์ด ์ด๊ณผ)" | |
| logger.warning(f"[keyword_analyzer] {msg}") | |
| return LookupResult( | |
| success=False, | |
| query=query, | |
| provider=provider, | |
| error=msg, | |
| empty_reason="provider_error", | |
| evidence=EvidenceEnvelope(status="error", errors=[msg]), | |
| ) | |
| except Exception as exc: | |
| logger.error(f"[keyword_analyzer] API ํธ์ถ ์ค๋ฅ: {exc}", exc_info=True) | |
| return LookupResult( | |
| success=False, | |
| query=query, | |
| provider=provider, | |
| error=str(exc), | |
| empty_reason="provider_error", | |
| evidence=EvidenceEnvelope(status="error", errors=[str(exc)]), | |
| ) | |
| all_results: List[Dict[str, Any]] = [] | |
| evidence_items: List[EvidenceItem] = [] | |
| errors: List[str] = [] | |
| if core_kw is not None: | |
| for item in core_kw: | |
| item["_source_api"] = "core_keyword" | |
| all_results.append(item) | |
| evidence_items.append( | |
| EvidenceItem( | |
| source_type="api", | |
| title=item.get("label", ""), | |
| excerpt=f"ํต์ฌํค์๋: {item.get('label', '')}, " | |
| f"์ ์={item.get('value', 0)}", | |
| provider_meta={"provider": provider, "api": "core_keyword"}, | |
| ) | |
| ) | |
| else: | |
| errors.append("ํต์ฌํค์๋ API ์คํจ") | |
| if related is not None: | |
| for item in related: | |
| item["_source_api"] = "related_word" | |
| all_results.append(item) | |
| evidence_items.append( | |
| EvidenceItem( | |
| source_type="api", | |
| title=item.get("label", ""), | |
| excerpt=f"์ฐ๊ด์ด: {item.get('label', '')}, " f"์ ์={item.get('value', 0)}", | |
| provider_meta={"provider": provider, "api": "related_word"}, | |
| ) | |
| ) | |
| else: | |
| if searchword: | |
| errors.append("์ฐ๊ด์ด API ์คํจ") | |
| if not all_results: | |
| status = "error" if errors else "empty" | |
| return LookupResult( | |
| success=not errors, | |
| query=query, | |
| provider=provider, | |
| empty_reason="no_match" if not errors else "provider_error", | |
| error="; ".join(errors) if errors else None, | |
| evidence=EvidenceEnvelope(items=[], status=status, errors=errors), | |
| ) | |
| context_text = self._build_context_text(core_kw, related) | |
| status = "ok" if not errors else "partial" | |
| return LookupResult( | |
| success=True, | |
| query=query, | |
| results=all_results, | |
| context_text=context_text, | |
| provider=provider, | |
| evidence=EvidenceEnvelope( | |
| items=evidence_items, | |
| summary_text=context_text, | |
| status=status, | |
| errors=errors, | |
| ), | |
| ) | |
| async def _fetch_all( | |
| self, | |
| date_from: str, | |
| date_to: str, | |
| searchword: str, | |
| result_count: int, | |
| ) -> tuple: | |
| """ํต์ฌํค์๋ + ์ฐ๊ด์ด๋ฅผ ๋ณ๋ ฌ ํธ์ถํ๋ค.""" | |
| tasks = [] | |
| # ํต์ฌํค์๋๋ date_from/date_to๊ฐ ์์ผ๋ฉด ํญ์ ํธ์ถ | |
| if date_from and date_to: | |
| tasks.append( | |
| self._safe_call( | |
| self._action.get_core_keywords, | |
| date_from=date_from, | |
| date_to=date_to, | |
| result_count=result_count, | |
| ) | |
| ) | |
| else: | |
| tasks.append(self._noop()) | |
| # ์ฐ๊ด์ด๋ searchword๊ฐ ์์ ๋๋ง ํธ์ถ | |
| if date_from and date_to and searchword: | |
| tasks.append( | |
| self._safe_call( | |
| self._action.get_related_words, | |
| date_from=date_from, | |
| date_to=date_to, | |
| searchword=searchword, | |
| result_count=result_count, | |
| ) | |
| ) | |
| else: | |
| tasks.append(self._noop()) | |
| return tuple(await asyncio.gather(*tasks)) | |
| async def _safe_call(fn, **kwargs) -> Optional[List[Dict[str, Any]]]: | |
| """๊ฐ๋ณ API ํธ์ถ์ ์์ ํ๊ฒ ๋ํํ๋ค.""" | |
| try: | |
| return await fn(**kwargs) | |
| except Exception as exc: | |
| logger.warning(f"[keyword_analyzer] ๊ฐ๋ณ API ์คํจ: {exc}") | |
| return None | |
| async def _noop() -> None: | |
| """๋น ๊ฒฐ๊ณผ๋ฅผ ๋ฐํํ๋ no-op ์ฝ๋ฃจํด.""" | |
| return None | |
| def _build_context_text( | |
| core_kw: Optional[List], | |
| related: Optional[List], | |
| ) -> str: | |
| """์กฐํฉ ๊ฒฐ๊ณผ์์ ์์ฐ์ด ์์ฝ์ ์์ฑํ๋ค.""" | |
| parts: List[str] = [] | |
| if core_kw: | |
| items = [] | |
| for k in core_kw[:5]: | |
| label = k.get("label", "") | |
| value = k.get("value", 0) | |
| try: | |
| value_f = float(value) | |
| items.append(f"{label}({value_f:,.0f}๊ฑด)") | |
| except (ValueError, TypeError): | |
| items.append(f"{label}({value})") | |
| if items: | |
| parts.append(f"ํต์ฌ ํค์๋: {', '.join(items)}") | |
| if related: | |
| items = [] | |
| for r in related[:5]: | |
| label = r.get("label", "") | |
| value = r.get("value", 0) | |
| try: | |
| value_f = float(value) | |
| items.append(f"{label}({value_f:,.1f}์ )") | |
| except (ValueError, TypeError): | |
| items.append(f"{label}({value})") | |
| if items: | |
| parts.append(f"์ฐ๊ด์ด: {', '.join(items)}") | |
| return ", ".join(parts) if parts else "" | |