|
|
""" |
|
|
Conversation Context Analyzer Module |
|
|
Analyzes conversation context to determine message independence and relevance |
|
|
""" |
|
|
|
|
|
import logging |
|
|
from typing import List, Dict, Optional |
|
|
from .llm_client import MistralClient |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
class ConversationContextAnalyzer: |
|
|
""" |
|
|
Analyzes conversation context to determine: |
|
|
1. Whether a message is legal-related or casual (greetings, thanks, etc.) |
|
|
2. Whether a message is independent or dependent on previous context |
|
|
3. Generates summaries for dependent conversations |
|
|
""" |
|
|
|
|
|
def __init__(self, model: str = "mistral-small-latest"): |
|
|
""" |
|
|
Initialize the context analyzer |
|
|
|
|
|
Args: |
|
|
model: Mistral model to use for analysis |
|
|
""" |
|
|
self.llm_client = MistralClient(model=model) |
|
|
logger.info(f"ConversationContextAnalyzer initialized with model: {model}") |
|
|
|
|
|
def is_non_legal_query(self, message: str) -> bool: |
|
|
""" |
|
|
Determine if a message is non-legal (greetings, thanks, casual conversation) |
|
|
|
|
|
Args: |
|
|
message: The user's message |
|
|
|
|
|
Returns: |
|
|
True if non-legal, False if legal-related |
|
|
""" |
|
|
try: |
|
|
system_prompt = """You are a classifier that determines if a message is related to legal matters or is casual conversation. |
|
|
|
|
|
Casual conversation includes: |
|
|
- Greetings (hi, hello, hey, good morning, etc.) |
|
|
- Thanks/gratitude (thank you, thanks, appreciate it, etc.) |
|
|
- Goodbye (bye, see you, goodbye, etc.) |
|
|
- Small talk (how are you, what's up, etc.) |
|
|
- Acknowledgments (ok, okay, yes, no, sure, etc.) |
|
|
|
|
|
Legal-related includes: |
|
|
- Questions about laws, regulations, rights |
|
|
- Legal issues, disputes, cases |
|
|
- Questions about legal procedures |
|
|
- Anything requiring legal information |
|
|
|
|
|
Respond with ONLY one word: "LEGAL" or "NON_LEGAL" |
|
|
""" |
|
|
|
|
|
prompt = f'Message: "{message}"\n\nClassify this message:' |
|
|
|
|
|
response = self.llm_client.generate_response( |
|
|
prompt=prompt, |
|
|
system_prompt=system_prompt, |
|
|
temperature=0.1 |
|
|
) |
|
|
|
|
|
result = response.strip().upper() |
|
|
is_non_legal = "NON_LEGAL" in result or "NON-LEGAL" in result |
|
|
|
|
|
logger.info(f"Non-legal query check: '{message[:50]}...' -> {is_non_legal}") |
|
|
return is_non_legal |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error in is_non_legal_query: {e}") |
|
|
|
|
|
return False |
|
|
|
|
|
def is_independent_query(self, current_msg: str, context: List[Dict[str, str]]) -> bool: |
|
|
""" |
|
|
Determine if the current message is independent of the conversation history |
|
|
|
|
|
Args: |
|
|
current_msg: The current user message |
|
|
context: List of previous messages [{"role": "user"/"assistant", "content": "..."}] |
|
|
|
|
|
Returns: |
|
|
True if independent, False if dependent on previous context |
|
|
""" |
|
|
try: |
|
|
|
|
|
if not context or len(context) == 0: |
|
|
return True |
|
|
|
|
|
|
|
|
conversation_text = self._format_context(context) |
|
|
|
|
|
system_prompt = """You are an analyzer that determines if a message is independent or dependent on previous conversation. |
|
|
|
|
|
INDEPENDENT messages: |
|
|
- Introduce a completely new topic |
|
|
- Can be understood without previous context |
|
|
- Are self-contained questions |
|
|
|
|
|
DEPENDENT messages: |
|
|
- Reference previous discussion (pronouns like "he", "she", "it", "they", "this", "that") |
|
|
- Continue or expand on previous topic |
|
|
- Ask follow-up questions |
|
|
- Require previous context to be understood |
|
|
|
|
|
Respond with ONLY one word: "INDEPENDENT" or "DEPENDENT" |
|
|
""" |
|
|
|
|
|
prompt = f"""Previous conversation: |
|
|
{conversation_text} |
|
|
|
|
|
Current message: "{current_msg}" |
|
|
|
|
|
Is the current message independent or dependent on the conversation?""" |
|
|
|
|
|
response = self.llm_client.generate_response( |
|
|
prompt=prompt, |
|
|
system_prompt=system_prompt, |
|
|
temperature=0.1 |
|
|
) |
|
|
|
|
|
result = response.strip().upper() |
|
|
is_independent = "INDEPENDENT" in result |
|
|
|
|
|
logger.info(f"Independence check: '{current_msg[:50]}...' -> {'independent' if is_independent else 'dependent'}") |
|
|
return is_independent |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error in is_independent_query: {e}") |
|
|
|
|
|
return True |
|
|
|
|
|
def summarize_conversation(self, current_msg: str, context: List[Dict[str, str]]) -> str: |
|
|
""" |
|
|
Create a concise summary combining conversation context and current message |
|
|
This summary will be sent to the RAG pipeline |
|
|
|
|
|
Args: |
|
|
current_msg: The current user message |
|
|
context: List of previous messages |
|
|
|
|
|
Returns: |
|
|
A concise query suitable for RAG retrieval |
|
|
""" |
|
|
try: |
|
|
conversation_text = self._format_context(context) |
|
|
|
|
|
system_prompt = """You are a legal assistant that creates concise, clear queries for a legal information retrieval system. |
|
|
|
|
|
Your task: Combine the conversation history with the current message to create ONE clear, self-contained legal query. |
|
|
|
|
|
Requirements: |
|
|
- Include all relevant context from the conversation |
|
|
- Replace pronouns with actual entities (e.g., "he" -> "my brother") |
|
|
- Keep it concise (1-3 sentences) |
|
|
- Make it specific and searchable |
|
|
- Focus on the legal aspect |
|
|
|
|
|
Example: |
|
|
Conversation: |
|
|
Human: I had a fight with my brother over property |
|
|
Assistant: [discusses property dispute laws] |
|
|
Human: He is making fake allegations |
|
|
|
|
|
Output: "My brother is making fake allegations against me in a property dispute. What are my legal rights and how should I respond?" |
|
|
""" |
|
|
|
|
|
prompt = f"""Conversation history: |
|
|
{conversation_text} |
|
|
|
|
|
Current message: "{current_msg}" |
|
|
|
|
|
Create a single, clear legal query:""" |
|
|
|
|
|
response = self.llm_client.generate_response( |
|
|
prompt=prompt, |
|
|
system_prompt=system_prompt, |
|
|
temperature=0.3 |
|
|
) |
|
|
|
|
|
summarized_query = response.strip() |
|
|
|
|
|
logger.info(f"Summarized query: {summarized_query[:100]}...") |
|
|
return summarized_query |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error in summarize_conversation: {e}") |
|
|
|
|
|
return current_msg |
|
|
|
|
|
def _format_context(self, context: List[Dict[str, str]], max_messages: int = 10) -> str: |
|
|
""" |
|
|
Format conversation context for LLM consumption |
|
|
|
|
|
Args: |
|
|
context: List of message dictionaries |
|
|
max_messages: Maximum number of messages to include |
|
|
|
|
|
Returns: |
|
|
Formatted conversation string |
|
|
""" |
|
|
|
|
|
recent_context = context[-max_messages:] if len(context) > max_messages else context |
|
|
|
|
|
formatted_lines = [] |
|
|
for msg in recent_context: |
|
|
role = msg.get("role", "") |
|
|
content = msg.get("content", "") |
|
|
|
|
|
if role == "user": |
|
|
formatted_lines.append(f"Human: {content}") |
|
|
elif role == "assistant": |
|
|
formatted_lines.append(f"Chatbot: {content}") |
|
|
|
|
|
return "\n".join(formatted_lines) |
|
|
|