|
|
""" |
|
|
Clarification Module for Echolalia Assistant |
|
|
반향어 분석을 위한 명확화 질문 생성 모듈 |
|
|
""" |
|
|
import re |
|
|
import json |
|
|
import logging |
|
|
from typing import Dict, Any, List, Tuple, Optional |
|
|
from dataclasses import dataclass |
|
|
from enum import Enum |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
class AmbiguityType(Enum): |
|
|
"""반향어 분석을 위한 모호성 유형""" |
|
|
CONTEXT_SITUATION = "context_situation" |
|
|
PREVIOUS_UTTERANCE = "previous_utterance" |
|
|
CHILD_AGE = "child_age" |
|
|
VOCABULARY_LEVEL = "vocabulary_level" |
|
|
EMOTIONAL_STATE = "emotional_state" |
|
|
GENERAL = "general" |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class AmbiguityResult: |
|
|
"""모호성 탐지 결과""" |
|
|
is_ambiguous: bool |
|
|
ambiguity_score: float |
|
|
missing_facets: List[AmbiguityType] |
|
|
reason: str |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class ClarifyingQuestion: |
|
|
"""명확화 질문""" |
|
|
question_type: AmbiguityType |
|
|
question_text: str |
|
|
options: Optional[List[str]] = None |
|
|
is_open_ended: bool = False |
|
|
|
|
|
|
|
|
class AmbiguityDetector: |
|
|
"""반향어 분석을 위한 모호성 탐지기""" |
|
|
|
|
|
def __init__(self, config: Dict[str, Any] = None): |
|
|
self.config = config or {} |
|
|
self.min_length = self.config.get('min_query_length', 5) |
|
|
self.threshold = self.config.get('ambiguity_threshold', 0.5) |
|
|
|
|
|
|
|
|
self.patterns = { |
|
|
'context_situation': [ |
|
|
r'상황', r'맥락', r'상황 설명', r'언제', r'어디서', r'어떤 상황' |
|
|
], |
|
|
'previous_utterance': [ |
|
|
r'이전', r'전에', r'앞서', r'질문', r'말', r'발화' |
|
|
], |
|
|
'child_age': [ |
|
|
r'\d+세', r'나이', r'연령', r'몇 살' |
|
|
], |
|
|
'vocabulary_level': [ |
|
|
r'어휘', r'언어 수준', r'수준', r'능력' |
|
|
], |
|
|
'emotional_state': [ |
|
|
r'감정', r'기분', r'상태', r'느낌' |
|
|
] |
|
|
} |
|
|
|
|
|
def detect(self, query: str, existing_info: Optional[Dict[str, Any]] = None) -> AmbiguityResult: |
|
|
"""모호성 탐지""" |
|
|
query_clean = query.strip() |
|
|
existing_info = existing_info or {} |
|
|
|
|
|
|
|
|
if len(query_clean) < self.min_length: |
|
|
return AmbiguityResult( |
|
|
is_ambiguous=True, |
|
|
ambiguity_score=0.9, |
|
|
missing_facets=[AmbiguityType.GENERAL], |
|
|
reason="질문이 너무 짧습니다" |
|
|
) |
|
|
|
|
|
missing_facets = [] |
|
|
query_lower = query_clean.lower() |
|
|
|
|
|
|
|
|
for facet, patterns in self.patterns.items(): |
|
|
found = False |
|
|
for pattern in patterns: |
|
|
if re.search(pattern, query_lower): |
|
|
found = True |
|
|
break |
|
|
|
|
|
|
|
|
facet_key = facet |
|
|
if facet_key not in existing_info or not existing_info[facet_key]: |
|
|
if not found: |
|
|
try: |
|
|
missing_facets.append(AmbiguityType(facet)) |
|
|
except ValueError: |
|
|
pass |
|
|
|
|
|
|
|
|
if not existing_info.get('context_situation'): |
|
|
if AmbiguityType.CONTEXT_SITUATION not in missing_facets: |
|
|
missing_facets.append(AmbiguityType.CONTEXT_SITUATION) |
|
|
|
|
|
ambiguity_score = len(missing_facets) / len(self.patterns) if missing_facets else 0.0 |
|
|
is_ambiguous = ambiguity_score >= self.threshold |
|
|
|
|
|
reason = f"부족한 정보: {', '.join([f.value for f in missing_facets])}" if missing_facets else "충분한 정보" |
|
|
|
|
|
return AmbiguityResult( |
|
|
is_ambiguous=is_ambiguous, |
|
|
ambiguity_score=ambiguity_score, |
|
|
missing_facets=missing_facets, |
|
|
reason=reason |
|
|
) |
|
|
|
|
|
|
|
|
class CQGenerator: |
|
|
"""명확화 질문 생성기""" |
|
|
|
|
|
def __init__(self, config: Dict[str, Any] = None): |
|
|
self.config = config or {} |
|
|
|
|
|
|
|
|
self.templates = { |
|
|
AmbiguityType.CONTEXT_SITUATION: "어떤 상황에서 이 말을 했나요? (예: 식사 시간, 놀이 시간, 이별 상황 등)", |
|
|
AmbiguityType.PREVIOUS_UTTERANCE: "아이에게 했던 질문이나 말이 있나요?", |
|
|
AmbiguityType.CHILD_AGE: "아이의 나이를 알려주세요.", |
|
|
AmbiguityType.VOCABULARY_LEVEL: "아이의 어휘 수준은 어느 정도인가요? (초급/중급/고급)", |
|
|
AmbiguityType.EMOTIONAL_STATE: "아이의 감정 상태는 어떤가요? (불안, 평온, 흥분 등)", |
|
|
AmbiguityType.GENERAL: "질문을 더 구체적으로 말씀해 주시겠어요?" |
|
|
} |
|
|
|
|
|
|
|
|
self.options = { |
|
|
AmbiguityType.CONTEXT_SITUATION: [ |
|
|
"식사 시간", "놀이 시간", "외출 준비", "수업 시간", |
|
|
"휴식 시간", "이별/분리 상황", "기타" |
|
|
], |
|
|
AmbiguityType.VOCABULARY_LEVEL: ["초급", "중급", "고급"], |
|
|
AmbiguityType.EMOTIONAL_STATE: ["불안", "평온", "흥분", "화남", "슬픔", "기타"] |
|
|
} |
|
|
|
|
|
def generate( |
|
|
self, |
|
|
ambiguity_result: AmbiguityResult, |
|
|
original_query: str = "", |
|
|
max_questions: int = 2 |
|
|
) -> List[ClarifyingQuestion]: |
|
|
"""명확화 질문 생성""" |
|
|
if not ambiguity_result.is_ambiguous: |
|
|
return [] |
|
|
|
|
|
questions = [] |
|
|
for facet in ambiguity_result.missing_facets[:max_questions]: |
|
|
question = self._generate_question(facet, original_query) |
|
|
if question: |
|
|
questions.append(question) |
|
|
|
|
|
return questions[:max_questions] |
|
|
|
|
|
def _generate_question(self, facet: AmbiguityType, original_query: str = "") -> Optional[ClarifyingQuestion]: |
|
|
"""특정 facet에 대한 질문 생성""" |
|
|
template = self.templates.get(facet) |
|
|
if not template: |
|
|
return None |
|
|
|
|
|
options = self.options.get(facet) |
|
|
is_open_ended = (options is None) |
|
|
|
|
|
return ClarifyingQuestion( |
|
|
question_type=facet, |
|
|
question_text=template, |
|
|
options=options, |
|
|
is_open_ended=is_open_ended |
|
|
) |
|
|
|
|
|
|
|
|
class QueryRewriter: |
|
|
"""쿼리 재작성기""" |
|
|
|
|
|
def rewrite( |
|
|
self, |
|
|
original_query: str, |
|
|
clarifications: Dict[AmbiguityType, str] |
|
|
) -> str: |
|
|
"""명확화 응답을 포함하여 쿼리 재작성""" |
|
|
if not clarifications: |
|
|
return original_query |
|
|
|
|
|
context_parts = [] |
|
|
for facet, response in clarifications.items(): |
|
|
if response and response.strip(): |
|
|
context_parts.append(response.strip()) |
|
|
|
|
|
if context_parts: |
|
|
rewritten = f"{original_query} (상황: {', '.join(context_parts)})" |
|
|
else: |
|
|
rewritten = original_query |
|
|
|
|
|
return rewritten |
|
|
|
|
|
|
|
|
class ClarificationModule: |
|
|
"""통합 명확화 모듈""" |
|
|
|
|
|
def __init__(self, config: Dict[str, Any] = None): |
|
|
self.config = config or {} |
|
|
self.detector = AmbiguityDetector(config) |
|
|
self.generator = CQGenerator(config) |
|
|
self.rewriter = QueryRewriter() |
|
|
|
|
|
self.max_rounds = self.config.get('max_clarification_rounds', 2) |
|
|
self.current_round = 0 |
|
|
self.clarifications = {} |
|
|
self.original_query_cache = "" |
|
|
|
|
|
def reset(self) -> None: |
|
|
"""상태 초기화""" |
|
|
self.current_round = 0 |
|
|
self.clarifications = {} |
|
|
self.original_query_cache = "" |
|
|
|
|
|
def should_clarify( |
|
|
self, |
|
|
query: str, |
|
|
existing_info: Optional[Dict[str, Any]] = None |
|
|
) -> Tuple[bool, AmbiguityResult]: |
|
|
"""명확화가 필요한지 확인""" |
|
|
if self.current_round >= self.max_rounds: |
|
|
return False, None |
|
|
|
|
|
existing_info = existing_info or {} |
|
|
ambiguity_result = self.detector.detect(query, existing_info) |
|
|
|
|
|
return ambiguity_result.is_ambiguous, ambiguity_result |
|
|
|
|
|
def get_clarifying_questions( |
|
|
self, |
|
|
ambiguity_result: AmbiguityResult, |
|
|
original_query: str = "" |
|
|
) -> List[ClarifyingQuestion]: |
|
|
"""명확화 질문 가져오기""" |
|
|
self.current_round += 1 |
|
|
if original_query: |
|
|
self.original_query_cache = original_query |
|
|
return self.generator.generate(ambiguity_result, self.original_query_cache) |
|
|
|
|
|
def process_response( |
|
|
self, |
|
|
original_query: str, |
|
|
question: ClarifyingQuestion, |
|
|
response: str |
|
|
) -> str: |
|
|
"""사용자 응답 처리""" |
|
|
self.clarifications[question.question_type] = response |
|
|
rewritten_query = self.rewriter.rewrite(original_query, self.clarifications) |
|
|
return rewritten_query |
|
|
|
|
|
|