File size: 5,388 Bytes
4739096
 
 
 
 
 
 
564c5be
 
4739096
564c5be
 
 
 
 
4739096
564c5be
4739096
564c5be
4739096
564c5be
 
4739096
564c5be
4739096
564c5be
 
4739096
 
564c5be
4739096
564c5be
4739096
564c5be
4739096
564c5be
 
 
 
 
 
4739096
564c5be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4739096
564c5be
 
 
 
 
 
 
 
 
 
4739096
564c5be
4739096
564c5be
 
 
 
 
 
 
 
 
 
 
 
 
4739096
564c5be
4739096
 
564c5be
4739096
 
564c5be
 
 
 
4739096
 
564c5be
4739096
 
 
564c5be
 
 
 
 
 
 
4739096
 
564c5be
 
4739096
564c5be
 
 
 
 
 
4739096
564c5be
 
 
 
 
 
 
 
 
 
4739096
 
564c5be
 
4739096
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# src/router/query_router.py

import logging

logger = logging.getLogger(__name__)

class QueryRouter:
    """
    Query๋ฅผ RAG vs Direct๋กœ ๋ผ์šฐํŒ… (ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๋ฒ„์ „)
    
    improved + lee ๋ฒ„์ „์˜ ์žฅ์  ๊ฒฐํ•ฉ:
    - improved: out_of_scope ํ‚ค์›Œ๋“œ๋กœ ๋ช…ํ™•ํ•œ ๋น„RFP ์งˆ๋ฌธ ๊ฐ์ง€
    - lee: ์ˆซ์ž + ์‚ฌ์—… ํ‚ค์›Œ๋“œ ์กฐํ•ฉ์œผ๋กœ ๋งฅ๋ฝ ํŒŒ์•…
    """

    def __init__(self):
        # ์ธ์‚ฌ ํ‚ค์›Œ๋“œ
        self.greeting_keywords = [
            "์•ˆ๋…•", "hi", "hello", "๋ฐ˜๊ฐ€์›Œ", "์ฒ˜์Œ", "์ธ์‚ฌ"
        ]

        # ๊ฐ์‚ฌ ํ‚ค์›Œ๋“œ
        self.thanks_keywords = [
            "๊ณ ๋งˆ์›Œ", "๊ฐ์‚ฌ", "thanks", "๊ณ ๋ง™", "๋•กํ"
        ]

        # RFP/์ž…์ฐฐ ๊ด€๋ จ ํ‚ค์›Œ๋“œ
        self.document_keywords = [
            # ๋ˆ ๊ด€๋ จ
            "์˜ˆ์‚ฐ", "๋น„์šฉ", "๊ธˆ์•ก", "์›", "๋งŒ์›", "์–ต", "์–ต์›",
            # ์ผ์ • ๊ด€๋ จ
            "๊ธฐํ•œ", "๋งˆ๊ฐ", "์–ธ์ œ", "๊ธฐ๊ฐ„", "๋‚ฉ๊ธฐ", "์ผ์ •",
            # ๋ฌธ์„œ ๊ด€๋ จ
            "์š”๊ตฌ์‚ฌํ•ญ", "์ œ์ถœ", "์„œ๋ฅ˜", "์–‘์‹", "ํ‰๊ฐ€", "rfp", "์ œ์•ˆ์„œ",
            # ์กฐ์ง ๊ด€๋ จ
            "๋ฐœ์ฃผ", "๊ธฐ๊ด€", "๋‹ด๋‹น์ž", "์—ฐ๋ฝ์ฒ˜", "๋ถ€์ฒ˜", "์ง€์ž์ฒด",
            # ์‚ฌ์—…/๊ณ„์•ฝ ๊ด€๋ จ
            "์‚ฌ์—…", "์‚ฌ์—…๋ช…", "๊ณผ์—…", "๋ฒ”์œ„", "๋ชฉ์ ", "๊ณ„์•ฝ", "์ž…์ฐฐ",
            "๊ณต๊ณ ", "ํ”„๋กœ์ ํŠธ", "์œ„ํƒ", "์šฉ์—ญ", "ํ˜‘์ƒ", "์ œ์•ˆ",
            # ์ œ๋„/๊ทœ์ • ๊ด€๋ จ
            "๋ฒ•", "๊ทœ์ •", "๊ธฐ์ค€", "์กฐ๊ฑด", "์ค‘์†Œ๊ธฐ์—…", "๋Œ€๊ธฐ์—…"
        ]
        
        # โœ… out_of_scope ํ‚ค์›Œ๋“œ (improved ๋ฒ„์ „์—์„œ ๊ฐ€์ ธ์˜ด)
        self.out_of_scope_keywords = [
            # ์Œ์‹
            "์ ์‹ฌ", "์ €๋…", "์•„์นจ", "์‹์‚ฌ", "๋ฐฅ", "๋ฉ”๋‰ด", "๋ง›์ง‘", "์Œ์‹", "์š”๋ฆฌ",
            # ๋‚ ์”จ/์ผ์ƒ
            "๋‚ ์”จ", "๊ธฐ์˜จ", "๋น„", "๋ˆˆ", "์ถ”์›Œ", "๋”์›Œ",
            # ์—”ํ„ฐํ…Œ์ธ๋จผํŠธ
            "์˜ํ™”", "๋“œ๋ผ๋งˆ", "๊ฒŒ์ž„", "๋…ธ๋ž˜", "์Œ์•…", "์œ ํŠœ๋ธŒ",
            # ์—ฌํ–‰/์ทจ๋ฏธ
            "์—ฌํ–‰", "๊ด€๊ด‘", "ํœด๊ฐ€", "์ทจ๋ฏธ", "์šด๋™", "๋“ฑ์‚ฐ",
            # ๊ธˆ์œต/ํˆฌ์ž (RFP์™€ ๋ฌด๊ด€)
            "์ฃผ์‹", "์ฝ”์ธ", "๋น„ํŠธ์ฝ”์ธ", "ํˆฌ์ž", "ํŽ€๋“œ", "๋ถ€๋™์‚ฐ",
            # ๊ธฐํƒ€
            "์‚ฌ๋ž‘", "์—ฐ์• ", "๋ฐ์ดํŠธ", "์นœ๊ตฌ", "๊ฐ€์กฑ"
        ]

    def classify(self, query: str) -> dict:
        """
        ์ฟผ๋ฆฌ ๋ถ„๋ฅ˜
        
        Returns:
            dict: {
                'type': 'greeting' | 'thanks' | 'document' | 'out_of_scope',
                'confidence': 0.0~1.0,
                'reason': str
            }
        """
        query_lower = query.lower()
        query_length = len(query)
        
        # โœ… 1. ๋ช…ํ™•ํ•œ out_of_scope ๋จผ์ € ์ฒดํฌ (improved ๋กœ์ง)
        for keyword in self.out_of_scope_keywords:
            if keyword in query_lower:
                logger.info(f"๐Ÿšซ out_of_scope ๊ฐ์ง€: '{keyword}' ํ‚ค์›Œ๋“œ")
                return {
                    'type': 'out_of_scope',
                    'confidence': 0.95,
                    'reason': f'๋น„RFP ํ‚ค์›Œ๋“œ ๊ฐ์ง€: {keyword}'
                }

        # 2. ์งง์€ ์งˆ๋ฌธ์ผ ๋•Œ๋งŒ ์ธ์‚ฌ/๊ฐ์‚ฌ ์ฒดํฌ (lee์˜ 25์ž ๊ธฐ์ค€ ์‚ฌ์šฉ)
        if query_length < 25:
            # ๊ฐ์‚ฌ
            if any(kw in query_lower for kw in self.thanks_keywords):
                logger.info(f"๐Ÿ™ thanks ๊ฐ์ง€")
                return {
                    'type': 'thanks',
                    'confidence': 0.90,
                    'reason': '๊ฐ์‚ฌ ์ธ์‚ฌ ๊ฐ์ง€'
                }
            
            # ์ธ์‚ฌ
            if any(kw in query_lower for kw in self.greeting_keywords):
                logger.info(f"๐Ÿ‘‹ greeting ๊ฐ์ง€")
                return {
                    'type': 'greeting',
                    'confidence': 0.90,
                    'reason': '์ธ์‚ฌ ๊ฐ์ง€'
                }

        # 3. RFP/๋ฌธ์„œ ๊ด€๋ จ ํ‚ค์›Œ๋“œ ์ฒดํฌ (๋™์  ์‹ ๋ขฐ๋„)
        document_matches = sum(1 for kw in self.document_keywords if kw in query_lower)
        
        if document_matches > 0:
            # ๋งค์นญ๋œ ํ‚ค์›Œ๋“œ ์ˆ˜์— ๋”ฐ๋ผ ์‹ ๋ขฐ๋„ ์กฐ์ •
            confidence = min(0.70 + (document_matches * 0.05), 0.95)
            logger.info(f"๐Ÿ“„ document ๊ฐ์ง€: {document_matches}๊ฐœ ํ‚ค์›Œ๋“œ ๋งค์นญ")
            return {
                'type': 'document',
                'confidence': confidence,
                'reason': f'RFP ํ‚ค์›Œ๋“œ {document_matches}๊ฐœ ๊ฐ์ง€'
            }

        # โœ… 4. ์ˆซ์ž + ์‚ฌ์—… ํ‚ค์›Œ๋“œ ์กฐํ•ฉ ์ฒดํฌ (lee ๋กœ์ง)
        # "12๊ฐœ์›” ์‚ฌ์—…", "5์–ต์› ํ”„๋กœ์ ํŠธ" ๊ฐ™์€ ๋งฅ๋ฝ ํŒŒ์•…
        has_number = any(ch.isdigit() for ch in query)
        business_terms = ["์‚ฌ์—…", "๊ณผ์—…", "๊ณ„ํš", "ํ”„๋กœ์ ํŠธ", "์šฉ์—ญ"]
        has_business = any(term in query_lower for term in business_terms)
        
        if has_number and has_business:
            logger.info(f"๐Ÿ”ข document ๊ฐ์ง€: ์ˆซ์ž + ์‚ฌ์—… ํ‚ค์›Œ๋“œ ์กฐํ•ฉ")
            return {
                'type': 'document',
                'confidence': 0.65,
                'reason': '์ˆซ์ž์™€ ์‚ฌ์—… ํ‚ค์›Œ๋“œ ๋™์‹œ ๊ฐ์ง€'
            }

        # 5. ๊ธฐ๋ณธ๊ฐ’: out_of_scope (improved์˜ 0.6 ์‚ฌ์šฉ)
        logger.info(f"๐Ÿšซ out_of_scope (๊ธฐ๋ณธ๊ฐ’): RFP ํ‚ค์›Œ๋“œ ์—†์Œ")
        return {
            'type': 'out_of_scope',
            'confidence': 0.60,
            'reason': 'RFP ๊ด€๋ จ ํ‚ค์›Œ๋“œ ๋ฏธ๊ฐ์ง€'
        }