์ฟผ๋ฆฌ ๋ผ์ฐํฐ out_of_scope ํค์๋ ์ถ๊ฐ, ์ซ์ ์ฌ์
๊ด๋ จ ํค์๋ ์ถ๊ฐ, ์งง์ ์ง๋ฌธ ๋ฒ์ ์์
564c5be
| # src/router/query_router.py | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| class QueryRouter: | |
| """ | |
| Query๋ฅผ RAG vs Direct๋ก ๋ผ์ฐํ (ํ์ด๋ธ๋ฆฌ๋ ๋ฒ์ ) | |
| improved + lee ๋ฒ์ ์ ์ฅ์ ๊ฒฐํฉ: | |
| - improved: out_of_scope ํค์๋๋ก ๋ช ํํ ๋นRFP ์ง๋ฌธ ๊ฐ์ง | |
| - lee: ์ซ์ + ์ฌ์ ํค์๋ ์กฐํฉ์ผ๋ก ๋งฅ๋ฝ ํ์ | |
| """ | |
| def __init__(self): | |
| # ์ธ์ฌ ํค์๋ | |
| self.greeting_keywords = [ | |
| "์๋ ", "hi", "hello", "๋ฐ๊ฐ์", "์ฒ์", "์ธ์ฌ" | |
| ] | |
| # ๊ฐ์ฌ ํค์๋ | |
| self.thanks_keywords = [ | |
| "๊ณ ๋ง์", "๊ฐ์ฌ", "thanks", "๊ณ ๋ง", "๋กํ" | |
| ] | |
| # RFP/์ ์ฐฐ ๊ด๋ จ ํค์๋ | |
| self.document_keywords = [ | |
| # ๋ ๊ด๋ จ | |
| "์์ฐ", "๋น์ฉ", "๊ธ์ก", "์", "๋ง์", "์ต", "์ต์", | |
| # ์ผ์ ๊ด๋ จ | |
| "๊ธฐํ", "๋ง๊ฐ", "์ธ์ ", "๊ธฐ๊ฐ", "๋ฉ๊ธฐ", "์ผ์ ", | |
| # ๋ฌธ์ ๊ด๋ จ | |
| "์๊ตฌ์ฌํญ", "์ ์ถ", "์๋ฅ", "์์", "ํ๊ฐ", "rfp", "์ ์์", | |
| # ์กฐ์ง ๊ด๋ จ | |
| "๋ฐ์ฃผ", "๊ธฐ๊ด", "๋ด๋น์", "์ฐ๋ฝ์ฒ", "๋ถ์ฒ", "์ง์์ฒด", | |
| # ์ฌ์ /๊ณ์ฝ ๊ด๋ จ | |
| "์ฌ์ ", "์ฌ์ ๋ช ", "๊ณผ์ ", "๋ฒ์", "๋ชฉ์ ", "๊ณ์ฝ", "์ ์ฐฐ", | |
| "๊ณต๊ณ ", "ํ๋ก์ ํธ", "์ํ", "์ฉ์ญ", "ํ์", "์ ์", | |
| # ์ ๋/๊ท์ ๊ด๋ จ | |
| "๋ฒ", "๊ท์ ", "๊ธฐ์ค", "์กฐ๊ฑด", "์ค์๊ธฐ์ ", "๋๊ธฐ์ " | |
| ] | |
| # โ out_of_scope ํค์๋ (improved ๋ฒ์ ์์ ๊ฐ์ ธ์ด) | |
| self.out_of_scope_keywords = [ | |
| # ์์ | |
| "์ ์ฌ", "์ ๋ ", "์์นจ", "์์ฌ", "๋ฐฅ", "๋ฉ๋ด", "๋ง์ง", "์์", "์๋ฆฌ", | |
| # ๋ ์จ/์ผ์ | |
| "๋ ์จ", "๊ธฐ์จ", "๋น", "๋", "์ถ์", "๋์", | |
| # ์ํฐํ ์ธ๋จผํธ | |
| "์ํ", "๋๋ผ๋ง", "๊ฒ์", "๋ ธ๋", "์์ ", "์ ํ๋ธ", | |
| # ์ฌํ/์ทจ๋ฏธ | |
| "์ฌํ", "๊ด๊ด", "ํด๊ฐ", "์ทจ๋ฏธ", "์ด๋", "๋ฑ์ฐ", | |
| # ๊ธ์ต/ํฌ์ (RFP์ ๋ฌด๊ด) | |
| "์ฃผ์", "์ฝ์ธ", "๋นํธ์ฝ์ธ", "ํฌ์", "ํ๋", "๋ถ๋์ฐ", | |
| # ๊ธฐํ | |
| "์ฌ๋", "์ฐ์ ", "๋ฐ์ดํธ", "์น๊ตฌ", "๊ฐ์กฑ" | |
| ] | |
| def classify(self, query: str) -> dict: | |
| """ | |
| ์ฟผ๋ฆฌ ๋ถ๋ฅ | |
| Returns: | |
| dict: { | |
| 'type': 'greeting' | 'thanks' | 'document' | 'out_of_scope', | |
| 'confidence': 0.0~1.0, | |
| 'reason': str | |
| } | |
| """ | |
| query_lower = query.lower() | |
| query_length = len(query) | |
| # โ 1. ๋ช ํํ out_of_scope ๋จผ์ ์ฒดํฌ (improved ๋ก์ง) | |
| for keyword in self.out_of_scope_keywords: | |
| if keyword in query_lower: | |
| logger.info(f"๐ซ out_of_scope ๊ฐ์ง: '{keyword}' ํค์๋") | |
| return { | |
| 'type': 'out_of_scope', | |
| 'confidence': 0.95, | |
| 'reason': f'๋นRFP ํค์๋ ๊ฐ์ง: {keyword}' | |
| } | |
| # 2. ์งง์ ์ง๋ฌธ์ผ ๋๋ง ์ธ์ฌ/๊ฐ์ฌ ์ฒดํฌ (lee์ 25์ ๊ธฐ์ค ์ฌ์ฉ) | |
| if query_length < 25: | |
| # ๊ฐ์ฌ | |
| if any(kw in query_lower for kw in self.thanks_keywords): | |
| logger.info(f"๐ thanks ๊ฐ์ง") | |
| return { | |
| 'type': 'thanks', | |
| 'confidence': 0.90, | |
| 'reason': '๊ฐ์ฌ ์ธ์ฌ ๊ฐ์ง' | |
| } | |
| # ์ธ์ฌ | |
| if any(kw in query_lower for kw in self.greeting_keywords): | |
| logger.info(f"๐ greeting ๊ฐ์ง") | |
| return { | |
| 'type': 'greeting', | |
| 'confidence': 0.90, | |
| 'reason': '์ธ์ฌ ๊ฐ์ง' | |
| } | |
| # 3. RFP/๋ฌธ์ ๊ด๋ จ ํค์๋ ์ฒดํฌ (๋์ ์ ๋ขฐ๋) | |
| document_matches = sum(1 for kw in self.document_keywords if kw in query_lower) | |
| if document_matches > 0: | |
| # ๋งค์นญ๋ ํค์๋ ์์ ๋ฐ๋ผ ์ ๋ขฐ๋ ์กฐ์ | |
| confidence = min(0.70 + (document_matches * 0.05), 0.95) | |
| logger.info(f"๐ document ๊ฐ์ง: {document_matches}๊ฐ ํค์๋ ๋งค์นญ") | |
| return { | |
| 'type': 'document', | |
| 'confidence': confidence, | |
| 'reason': f'RFP ํค์๋ {document_matches}๊ฐ ๊ฐ์ง' | |
| } | |
| # โ 4. ์ซ์ + ์ฌ์ ํค์๋ ์กฐํฉ ์ฒดํฌ (lee ๋ก์ง) | |
| # "12๊ฐ์ ์ฌ์ ", "5์ต์ ํ๋ก์ ํธ" ๊ฐ์ ๋งฅ๋ฝ ํ์ | |
| has_number = any(ch.isdigit() for ch in query) | |
| business_terms = ["์ฌ์ ", "๊ณผ์ ", "๊ณํ", "ํ๋ก์ ํธ", "์ฉ์ญ"] | |
| has_business = any(term in query_lower for term in business_terms) | |
| if has_number and has_business: | |
| logger.info(f"๐ข document ๊ฐ์ง: ์ซ์ + ์ฌ์ ํค์๋ ์กฐํฉ") | |
| return { | |
| 'type': 'document', | |
| 'confidence': 0.65, | |
| 'reason': '์ซ์์ ์ฌ์ ํค์๋ ๋์ ๊ฐ์ง' | |
| } | |
| # 5. ๊ธฐ๋ณธ๊ฐ: out_of_scope (improved์ 0.6 ์ฌ์ฉ) | |
| logger.info(f"๐ซ out_of_scope (๊ธฐ๋ณธ๊ฐ): RFP ํค์๋ ์์") | |
| return { | |
| 'type': 'out_of_scope', | |
| 'confidence': 0.60, | |
| 'reason': 'RFP ๊ด๋ จ ํค์๋ ๋ฏธ๊ฐ์ง' | |
| } |