RFP_summary_chatbot / src /router /query_router.py
Dongjin1203's picture
์ฟผ๋ฆฌ ๋ผ์šฐํ„ฐ out_of_scope ํ‚ค์›Œ๋“œ ์ถ”๊ฐ€, ์ˆซ์ž ์‚ฌ์—… ๊ด€๋ จ ํ‚ค์›Œ๋“œ ์ถ”๊ฐ€, ์งง์€ ์งˆ๋ฌธ ๋ฒ”์œ„ ์ˆ˜์ •
564c5be
raw
history blame
5.39 kB
# src/router/query_router.py
import logging
logger = logging.getLogger(__name__)
class QueryRouter:
"""
Query๋ฅผ RAG vs Direct๋กœ ๋ผ์šฐํŒ… (ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๋ฒ„์ „)
improved + lee ๋ฒ„์ „์˜ ์žฅ์  ๊ฒฐํ•ฉ:
- improved: out_of_scope ํ‚ค์›Œ๋“œ๋กœ ๋ช…ํ™•ํ•œ ๋น„RFP ์งˆ๋ฌธ ๊ฐ์ง€
- lee: ์ˆซ์ž + ์‚ฌ์—… ํ‚ค์›Œ๋“œ ์กฐํ•ฉ์œผ๋กœ ๋งฅ๋ฝ ํŒŒ์•…
"""
def __init__(self):
# ์ธ์‚ฌ ํ‚ค์›Œ๋“œ
self.greeting_keywords = [
"์•ˆ๋…•", "hi", "hello", "๋ฐ˜๊ฐ€์›Œ", "์ฒ˜์Œ", "์ธ์‚ฌ"
]
# ๊ฐ์‚ฌ ํ‚ค์›Œ๋“œ
self.thanks_keywords = [
"๊ณ ๋งˆ์›Œ", "๊ฐ์‚ฌ", "thanks", "๊ณ ๋ง™", "๋•กํ"
]
# RFP/์ž…์ฐฐ ๊ด€๋ จ ํ‚ค์›Œ๋“œ
self.document_keywords = [
# ๋ˆ ๊ด€๋ จ
"์˜ˆ์‚ฐ", "๋น„์šฉ", "๊ธˆ์•ก", "์›", "๋งŒ์›", "์–ต", "์–ต์›",
# ์ผ์ • ๊ด€๋ จ
"๊ธฐํ•œ", "๋งˆ๊ฐ", "์–ธ์ œ", "๊ธฐ๊ฐ„", "๋‚ฉ๊ธฐ", "์ผ์ •",
# ๋ฌธ์„œ ๊ด€๋ จ
"์š”๊ตฌ์‚ฌํ•ญ", "์ œ์ถœ", "์„œ๋ฅ˜", "์–‘์‹", "ํ‰๊ฐ€", "rfp", "์ œ์•ˆ์„œ",
# ์กฐ์ง ๊ด€๋ จ
"๋ฐœ์ฃผ", "๊ธฐ๊ด€", "๋‹ด๋‹น์ž", "์—ฐ๋ฝ์ฒ˜", "๋ถ€์ฒ˜", "์ง€์ž์ฒด",
# ์‚ฌ์—…/๊ณ„์•ฝ ๊ด€๋ จ
"์‚ฌ์—…", "์‚ฌ์—…๋ช…", "๊ณผ์—…", "๋ฒ”์œ„", "๋ชฉ์ ", "๊ณ„์•ฝ", "์ž…์ฐฐ",
"๊ณต๊ณ ", "ํ”„๋กœ์ ํŠธ", "์œ„ํƒ", "์šฉ์—ญ", "ํ˜‘์ƒ", "์ œ์•ˆ",
# ์ œ๋„/๊ทœ์ • ๊ด€๋ จ
"๋ฒ•", "๊ทœ์ •", "๊ธฐ์ค€", "์กฐ๊ฑด", "์ค‘์†Œ๊ธฐ์—…", "๋Œ€๊ธฐ์—…"
]
# โœ… out_of_scope ํ‚ค์›Œ๋“œ (improved ๋ฒ„์ „์—์„œ ๊ฐ€์ ธ์˜ด)
self.out_of_scope_keywords = [
# ์Œ์‹
"์ ์‹ฌ", "์ €๋…", "์•„์นจ", "์‹์‚ฌ", "๋ฐฅ", "๋ฉ”๋‰ด", "๋ง›์ง‘", "์Œ์‹", "์š”๋ฆฌ",
# ๋‚ ์”จ/์ผ์ƒ
"๋‚ ์”จ", "๊ธฐ์˜จ", "๋น„", "๋ˆˆ", "์ถ”์›Œ", "๋”์›Œ",
# ์—”ํ„ฐํ…Œ์ธ๋จผํŠธ
"์˜ํ™”", "๋“œ๋ผ๋งˆ", "๊ฒŒ์ž„", "๋…ธ๋ž˜", "์Œ์•…", "์œ ํŠœ๋ธŒ",
# ์—ฌํ–‰/์ทจ๋ฏธ
"์—ฌํ–‰", "๊ด€๊ด‘", "ํœด๊ฐ€", "์ทจ๋ฏธ", "์šด๋™", "๋“ฑ์‚ฐ",
# ๊ธˆ์œต/ํˆฌ์ž (RFP์™€ ๋ฌด๊ด€)
"์ฃผ์‹", "์ฝ”์ธ", "๋น„ํŠธ์ฝ”์ธ", "ํˆฌ์ž", "ํŽ€๋“œ", "๋ถ€๋™์‚ฐ",
# ๊ธฐํƒ€
"์‚ฌ๋ž‘", "์—ฐ์• ", "๋ฐ์ดํŠธ", "์นœ๊ตฌ", "๊ฐ€์กฑ"
]
def classify(self, query: str) -> dict:
"""
์ฟผ๋ฆฌ ๋ถ„๋ฅ˜
Returns:
dict: {
'type': 'greeting' | 'thanks' | 'document' | 'out_of_scope',
'confidence': 0.0~1.0,
'reason': str
}
"""
query_lower = query.lower()
query_length = len(query)
# โœ… 1. ๋ช…ํ™•ํ•œ out_of_scope ๋จผ์ € ์ฒดํฌ (improved ๋กœ์ง)
for keyword in self.out_of_scope_keywords:
if keyword in query_lower:
logger.info(f"๐Ÿšซ out_of_scope ๊ฐ์ง€: '{keyword}' ํ‚ค์›Œ๋“œ")
return {
'type': 'out_of_scope',
'confidence': 0.95,
'reason': f'๋น„RFP ํ‚ค์›Œ๋“œ ๊ฐ์ง€: {keyword}'
}
# 2. ์งง์€ ์งˆ๋ฌธ์ผ ๋•Œ๋งŒ ์ธ์‚ฌ/๊ฐ์‚ฌ ์ฒดํฌ (lee์˜ 25์ž ๊ธฐ์ค€ ์‚ฌ์šฉ)
if query_length < 25:
# ๊ฐ์‚ฌ
if any(kw in query_lower for kw in self.thanks_keywords):
logger.info(f"๐Ÿ™ thanks ๊ฐ์ง€")
return {
'type': 'thanks',
'confidence': 0.90,
'reason': '๊ฐ์‚ฌ ์ธ์‚ฌ ๊ฐ์ง€'
}
# ์ธ์‚ฌ
if any(kw in query_lower for kw in self.greeting_keywords):
logger.info(f"๐Ÿ‘‹ greeting ๊ฐ์ง€")
return {
'type': 'greeting',
'confidence': 0.90,
'reason': '์ธ์‚ฌ ๊ฐ์ง€'
}
# 3. RFP/๋ฌธ์„œ ๊ด€๋ จ ํ‚ค์›Œ๋“œ ์ฒดํฌ (๋™์  ์‹ ๋ขฐ๋„)
document_matches = sum(1 for kw in self.document_keywords if kw in query_lower)
if document_matches > 0:
# ๋งค์นญ๋œ ํ‚ค์›Œ๋“œ ์ˆ˜์— ๋”ฐ๋ผ ์‹ ๋ขฐ๋„ ์กฐ์ •
confidence = min(0.70 + (document_matches * 0.05), 0.95)
logger.info(f"๐Ÿ“„ document ๊ฐ์ง€: {document_matches}๊ฐœ ํ‚ค์›Œ๋“œ ๋งค์นญ")
return {
'type': 'document',
'confidence': confidence,
'reason': f'RFP ํ‚ค์›Œ๋“œ {document_matches}๊ฐœ ๊ฐ์ง€'
}
# โœ… 4. ์ˆซ์ž + ์‚ฌ์—… ํ‚ค์›Œ๋“œ ์กฐํ•ฉ ์ฒดํฌ (lee ๋กœ์ง)
# "12๊ฐœ์›” ์‚ฌ์—…", "5์–ต์› ํ”„๋กœ์ ํŠธ" ๊ฐ™์€ ๋งฅ๋ฝ ํŒŒ์•…
has_number = any(ch.isdigit() for ch in query)
business_terms = ["์‚ฌ์—…", "๊ณผ์—…", "๊ณ„ํš", "ํ”„๋กœ์ ํŠธ", "์šฉ์—ญ"]
has_business = any(term in query_lower for term in business_terms)
if has_number and has_business:
logger.info(f"๐Ÿ”ข document ๊ฐ์ง€: ์ˆซ์ž + ์‚ฌ์—… ํ‚ค์›Œ๋“œ ์กฐํ•ฉ")
return {
'type': 'document',
'confidence': 0.65,
'reason': '์ˆซ์ž์™€ ์‚ฌ์—… ํ‚ค์›Œ๋“œ ๋™์‹œ ๊ฐ์ง€'
}
# 5. ๊ธฐ๋ณธ๊ฐ’: out_of_scope (improved์˜ 0.6 ์‚ฌ์šฉ)
logger.info(f"๐Ÿšซ out_of_scope (๊ธฐ๋ณธ๊ฐ’): RFP ํ‚ค์›Œ๋“œ ์—†์Œ")
return {
'type': 'out_of_scope',
'confidence': 0.60,
'reason': 'RFP ๊ด€๋ จ ํ‚ค์›Œ๋“œ ๋ฏธ๊ฐ์ง€'
}