Spaces:
Paused
Paused
| """Context-aware query builder for search-oriented tools. | |
| Issue #159: follow-up ์์ฒญ์์ ์๋ฌธ ์ง๋ฌธ, ๊ธฐ์กด ์ด์, ์ต๊ทผ tool ์์ฝ์ ๋ฐ์ํด | |
| RAG/API ์กฐํ์ฉ query variant๋ฅผ ๋ง๋ ๋ค. | |
| """ | |
| from __future__ import annotations | |
| import re | |
| from typing import TYPE_CHECKING, Any, Dict, Sequence | |
| if TYPE_CHECKING: | |
| from .session_context import SessionContext | |
| _FOLLOW_UP_PATTERNS = tuple( | |
| re.compile(pattern) | |
| for pattern in ( | |
| r"๊ทผ๊ฑฐ", | |
| r"์ถ์ฒ", | |
| r"๋งํฌ", | |
| r"์ด์ ", | |
| r"๋ณด๊ฐ", | |
| r"์ถ๊ฐ", | |
| r"๋ค์", | |
| r"์์ ", | |
| r"์ ์ค", | |
| r"๊ณต์", | |
| r"์ด ๋ต๋ณ", | |
| r"์ ๋ต๋ณ", | |
| r"๊ธฐ์กด ๋ต๋ณ", | |
| ) | |
| ) | |
| _MAX_QUERY_LEN = 480 | |
| _MAX_USER_LEN = 180 | |
| _MAX_ASSISTANT_LEN = 220 | |
| _MAX_TOOL_SUMMARY_LEN = 120 | |
| # ์ง์๋๋ช ์ฌ/์ฐธ์กฐ ํํ: ์ด์ turn์ ๊ฐ๋ฆฌํค๋ ํํ์ด ์์ผ๋ฉด follow-up์ผ๋ก ๊ฐ์ฃผ | |
| _ANAPHORA_PATTERNS = tuple( | |
| re.compile(pattern) | |
| for pattern in ( | |
| r"๊ทธ๊ฑฐ", | |
| r"์ด๊ฑฐ", | |
| r"์ ๊ฑฐ", | |
| r"์ด๊ฒ", | |
| r"๊ทธ๊ฒ", | |
| r"์ ๊ฒ", | |
| r"์\s*(๋ต๋ณ|๋ด์ฉ|๊ธ|์ค๋ช |ํญ๋ชฉ)", | |
| r"์๋\s*(๋ต๋ณ|๋ด์ฉ|๊ธ|์ค๋ช |ํญ๋ชฉ)", | |
| r"์ด\s*(๋ต๋ณ|๋ด์ฉ|๊ธ|์ค๋ช )", | |
| r"๊ทธ\s*(๋ต๋ณ|๋ด์ฉ|๊ธ|์ค๋ช )", | |
| r"๊ธฐ์กด\s*(๋ต๋ณ|๋ด์ฉ|๊ธ|์ด์)", | |
| r"๋ฐฉ๊ธ", | |
| r"์์", | |
| r"์์์", | |
| ) | |
| ) | |
| # ์๊ธฐ์๊ฒฐ ํ์ : ์ฟผ๋ฆฌ์ ๋ ๋ฆฝ ๋ช ์ฌ๊ฐ ์ด ์ ์ด์์ด๋ฉด ์๊ธฐ์๊ฒฐ์ ์ผ๋ก ๊ฐ์ฃผ | |
| _SELF_CONTAINED_NOUN_MIN = 2 | |
| def build_runtime_query_context(session: "SessionContext", current_query: str) -> Dict[str, Any]: | |
| """์ธ์ ์์ query builder ์ ๋ ฅ์ฉ ๊ตฌ์กฐํ ์ปจํ ์คํธ๋ฅผ ์ถ์ถํ๋ค.""" | |
| previous_user, previous_assistant = extract_previous_turns(session, current_query) | |
| return { | |
| "session_id": session.session_id, | |
| "query": normalize_text(current_query), | |
| "session_context": session.build_context_summary(), | |
| "previous_user_query": clip_text(previous_user, _MAX_USER_LEN), | |
| "previous_assistant_response": clip_text(previous_assistant, _MAX_ASSISTANT_LEN), | |
| "recent_tool_summary": clip_text(build_recent_tool_summary(session), _MAX_TOOL_SUMMARY_LEN), | |
| } | |
| def extract_previous_turns( | |
| session: "SessionContext", | |
| current_query: str, | |
| ) -> tuple[str, str]: | |
| """ํ์ฌ ์์ฒญ ์ง์ ์ user / assistant turn์ ์ถ์ถํ๋ค.""" | |
| turns = list(session.recent_history) | |
| normalized_current = normalize_text(current_query) | |
| if ( | |
| turns | |
| and turns[-1].role == "user" | |
| and normalize_text(turns[-1].content) == normalized_current | |
| ): | |
| turns = turns[:-1] | |
| previous_user = next( | |
| (normalize_text(turn.content) for turn in reversed(turns) if turn.role == "user"), | |
| "", | |
| ) | |
| previous_assistant = next( | |
| (normalize_text(turn.content) for turn in reversed(turns) if turn.role == "assistant"), | |
| "", | |
| ) | |
| return previous_user, previous_assistant | |
| def build_recent_tool_summary(session: "SessionContext") -> str: | |
| """์ต๊ทผ tool ์คํ์ ์งง์ ์์ฝ์ผ๋ก ๋ณํํ๋ค.""" | |
| parts: list[str] = [] | |
| for record in session.recent_tool_runs[-3:]: | |
| tool_parts = [record.tool] | |
| if isinstance(record.metadata, dict): | |
| query = normalize_text(record.metadata.get("query", "")) | |
| text_preview = normalize_text(record.metadata.get("text_preview", "")) | |
| if query: | |
| tool_parts.append(clip_text(query, 60)) | |
| elif text_preview: | |
| tool_parts.append(clip_text(text_preview, 60)) | |
| count = record.metadata.get("count") | |
| if count is not None: | |
| tool_parts.append(f"count {count}") | |
| parts.append(" ".join(part for part in tool_parts if part)) | |
| return " | ".join(parts) | |
| def is_self_contained_query(query: str) -> bool: | |
| """์ฟผ๋ฆฌ๊ฐ ์ด์ ๋งฅ๋ฝ ์์ด ๋ ๋ฆฝ์ ์ผ๋ก ์ดํด ๊ฐ๋ฅํ์ง ํ์ ํ๋ค. | |
| ์ง์๋๋ช ์ฌ๋ ์ฐธ์กฐ ํํ์ด ์๊ณ , ๋ ๋ฆฝ ๋ช ์ฌ(๊ณต๋ฐฑ ๊ตฌ๋ถ ํ ํฐ)๊ฐ ์ถฉ๋ถํ | |
| ํฌํจ๋ ์ฟผ๋ฆฌ๋ ์๊ธฐ์๊ฒฐ์ ์ผ๋ก ๊ฐ์ฃผํ๋ค. | |
| """ | |
| if any(pattern.search(query) for pattern in _ANAPHORA_PATTERNS): | |
| return False | |
| tokens = [t for t in query.split() if len(t) >= 2] | |
| return len(tokens) >= _SELF_CONTAINED_NOUN_MIN | |
| def should_use_follow_up_context( | |
| query: str, | |
| *, | |
| tool_names: Sequence[str], | |
| previous_user: str, | |
| previous_assistant: str, | |
| ) -> bool: | |
| """์ด์ user/assistant turn์ query์ ์์ด์ผ ํ๋ follow-up์ธ์ง ํ๋จํ๋ค. | |
| ์ฟผ๋ฆฌ๊ฐ ์๊ธฐ์๊ฒฐ์ ์ด๋ฉด ์ด์ ๋งฅ๋ฝ์ ์ฃผ์ ํ์ง ์๋๋ค. | |
| ์ค์ ํ์ ์ง๋ฌธ(์ง์๋๋ช ์ฌยท์ฐธ์กฐ ํํ ํฌํจ ๋๋ | |
| _FOLLOW_UP_PATTERNS ๋งค์นญ)์ธ ๊ฒฝ์ฐ์๋ง True๋ฅผ ๋ฐํํ๋ค. | |
| """ | |
| if not (previous_user or previous_assistant): | |
| return False | |
| # ์๊ธฐ์๊ฒฐ์ ์ฟผ๋ฆฌ๋ฉด ์ด์ ๋งฅ๋ฝ ์ฃผ์ ์ ํจ | |
| if is_self_contained_query(query): | |
| return False | |
| return any(pattern.search(query) for pattern in _FOLLOW_UP_PATTERNS) | |
| def clip_text(value: Any, limit: int) -> str: | |
| """๊ธด ๋ฌธ์ฅ์ ๊ณ ์ ๊ธธ์ด๋ก ์๋ผ๋ธ๋ค.""" | |
| text = normalize_text(value) | |
| if len(text) <= limit: | |
| return text | |
| return text[: max(limit - 3, 0)].rstrip() + "..." | |
| def normalize_text(value: Any) -> str: | |
| """๊ณต๋ฐฑ์ ์ ๊ทํํ ๋จ์ผ ๋ผ์ธ ํ ์คํธ๋ก ๋ณํํ๋ค.""" | |
| return re.sub(r"\s+", " ", str(value or "")).strip() | |