File size: 7,577 Bytes
3998131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
"""
Conversation Context Analyzer Module
Analyzes conversation context to determine message independence and relevance
"""

import logging
from typing import List, Dict, Optional
from .llm_client import MistralClient

logger = logging.getLogger(__name__)


class ConversationContextAnalyzer:
    """
    Analyzes conversation context to determine:
    1. Whether a message is legal-related or casual (greetings, thanks, etc.)
    2. Whether a message is independent or dependent on previous context
    3. Generates summaries for dependent conversations
    """

    def __init__(self, model: str = "mistral-small-latest"):
        """
        Initialize the context analyzer

        Args:
            model: Mistral model to use for analysis
        """
        self.llm_client = MistralClient(model=model)
        logger.info(f"ConversationContextAnalyzer initialized with model: {model}")

    def is_non_legal_query(self, message: str) -> bool:
        """
        Determine if a message is non-legal (greetings, thanks, casual conversation)

        Args:
            message: The user's message

        Returns:
            True if non-legal, False if legal-related
        """
        try:
            system_prompt = """You are a classifier that determines if a message is related to legal matters or is casual conversation.

Casual conversation includes:
- Greetings (hi, hello, hey, good morning, etc.)
- Thanks/gratitude (thank you, thanks, appreciate it, etc.)
- Goodbye (bye, see you, goodbye, etc.)
- Small talk (how are you, what's up, etc.)
- Acknowledgments (ok, okay, yes, no, sure, etc.)

Legal-related includes:
- Questions about laws, regulations, rights
- Legal issues, disputes, cases
- Questions about legal procedures
- Anything requiring legal information

Respond with ONLY one word: "LEGAL" or "NON_LEGAL"
"""

            prompt = f'Message: "{message}"\n\nClassify this message:'

            response = self.llm_client.generate_response(
                prompt=prompt,
                system_prompt=system_prompt,
                temperature=0.1  # Low temperature for consistent classification
            )

            result = response.strip().upper()
            is_non_legal = "NON_LEGAL" in result or "NON-LEGAL" in result

            logger.info(f"Non-legal query check: '{message[:50]}...' -> {is_non_legal}")
            return is_non_legal

        except Exception as e:
            logger.error(f"Error in is_non_legal_query: {e}")
            # On error, assume it's legal-related to be safe
            return False

    def is_independent_query(self, current_msg: str, context: List[Dict[str, str]]) -> bool:
        """
        Determine if the current message is independent of the conversation history

        Args:
            current_msg: The current user message
            context: List of previous messages [{"role": "user"/"assistant", "content": "..."}]

        Returns:
            True if independent, False if dependent on previous context
        """
        try:
            # If no context, it's independent
            if not context or len(context) == 0:
                return True

            # Format conversation history
            conversation_text = self._format_context(context)

            system_prompt = """You are an analyzer that determines if a message is independent or dependent on previous conversation.

INDEPENDENT messages:
- Introduce a completely new topic
- Can be understood without previous context
- Are self-contained questions

DEPENDENT messages:
- Reference previous discussion (pronouns like "he", "she", "it", "they", "this", "that")
- Continue or expand on previous topic
- Ask follow-up questions
- Require previous context to be understood

Respond with ONLY one word: "INDEPENDENT" or "DEPENDENT"
"""

            prompt = f"""Previous conversation:
{conversation_text}

Current message: "{current_msg}"

Is the current message independent or dependent on the conversation?"""

            response = self.llm_client.generate_response(
                prompt=prompt,
                system_prompt=system_prompt,
                temperature=0.1  # Low temperature for consistent classification
            )

            result = response.strip().upper()
            is_independent = "INDEPENDENT" in result

            logger.info(f"Independence check: '{current_msg[:50]}...' -> {'independent' if is_independent else 'dependent'}")
            return is_independent

        except Exception as e:
            logger.error(f"Error in is_independent_query: {e}")
            # On error, assume independent to avoid incorrect context merging
            return True

    def summarize_conversation(self, current_msg: str, context: List[Dict[str, str]]) -> str:
        """
        Create a concise summary combining conversation context and current message
        This summary will be sent to the RAG pipeline

        Args:
            current_msg: The current user message
            context: List of previous messages

        Returns:
            A concise query suitable for RAG retrieval
        """
        try:
            conversation_text = self._format_context(context)

            system_prompt = """You are a legal assistant that creates concise, clear queries for a legal information retrieval system.

Your task: Combine the conversation history with the current message to create ONE clear, self-contained legal query.

Requirements:
- Include all relevant context from the conversation
- Replace pronouns with actual entities (e.g., "he" -> "my brother")
- Keep it concise (1-3 sentences)
- Make it specific and searchable
- Focus on the legal aspect

Example:
Conversation:
Human: I had a fight with my brother over property
Assistant: [discusses property dispute laws]
Human: He is making fake allegations

Output: "My brother is making fake allegations against me in a property dispute. What are my legal rights and how should I respond?"
"""

            prompt = f"""Conversation history:
{conversation_text}

Current message: "{current_msg}"

Create a single, clear legal query:"""

            response = self.llm_client.generate_response(
                prompt=prompt,
                system_prompt=system_prompt,
                temperature=0.3  # Slightly higher for natural query generation
            )

            summarized_query = response.strip()

            logger.info(f"Summarized query: {summarized_query[:100]}...")
            return summarized_query

        except Exception as e:
            logger.error(f"Error in summarize_conversation: {e}")
            # Fallback: return the current message as-is
            return current_msg

    def _format_context(self, context: List[Dict[str, str]], max_messages: int = 10) -> str:
        """
        Format conversation context for LLM consumption

        Args:
            context: List of message dictionaries
            max_messages: Maximum number of messages to include

        Returns:
            Formatted conversation string
        """
        # Take only the most recent messages
        recent_context = context[-max_messages:] if len(context) > max_messages else context

        formatted_lines = []
        for msg in recent_context:
            role = msg.get("role", "")
            content = msg.get("content", "")

            if role == "user":
                formatted_lines.append(f"Human: {content}")
            elif role == "assistant":
                formatted_lines.append(f"Chatbot: {content}")

        return "\n".join(formatted_lines)