File size: 10,369 Bytes
1d95600
 
 
 
 
 
 
 
 
 
 
 
 
a2438f7
 
 
1d95600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2438f7
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
import openai
import os
import logging
from typing import Dict, Any, List, Optional
from datetime import datetime
from app.models import ChatMessage, ChatSession
from app.rag_system import RAGSystem

logger = logging.getLogger(__name__)

class RAGChatbot:
    """RAG-powered chatbot with memory of PDF and lecture content"""
    
    def __init__(self, openai_api_key: str):
        self.client = openai.OpenAI(api_key=openai_api_key)
        self.rag_system = RAGSystem(openai_api_key=openai_api_key)
        self.sessions: Dict[str, ChatSession] = {}
        self.max_context_length = 8000  # Token limit for context
    
    def create_session(self, session_id: str, pdf_content: str = None, lecture_content: str = None) -> bool:
        """Create a new chat session with optional PDF and lecture content"""
        try:
            session = ChatSession(
                session_id=session_id,
                pdf_content=pdf_content,
                lecture_content=lecture_content
            )
            
            self.sessions[session_id] = session
            
            # Add content to RAG system if provided
            if pdf_content:
                self.rag_system.add_pdf_content(session_id, pdf_content)
            
            if lecture_content:
                self.rag_system.add_lecture_content(session_id, lecture_content)
            
            logger.info(f"Created chat session {session_id}")
            return True
            
        except Exception as e:
            logger.error(f"Failed to create session {session_id}: {str(e)}")
            return False
    
    def add_message(self, session_id: str, role: str, content: str) -> bool:
        """Add a message to the session history"""
        try:
            if session_id not in self.sessions:
                return False
            
            message = ChatMessage(role=role, content=content)
            self.sessions[session_id].messages.append(message)
            return True
            
        except Exception as e:
            logger.error(f"Failed to add message to session {session_id}: {str(e)}")
            return False
    
    def get_response(self, session_id: str, user_message: str) -> Dict[str, Any]:
        """Generate a response to user message using RAG"""
        try:
            if session_id not in self.sessions:
                return {
                    'success': False,
                    'error': 'Session not found',
                    'response': ''
                }
            
            session = self.sessions[session_id]
            
            # Add user message to history
            self.add_message(session_id, "user", user_message)
            
            # Retrieve relevant content
            retrieval_result = self.rag_system.retrieve_relevant_content(
                session_id, user_message, n_results=5
            )
            
            if not retrieval_result['success']:
                logger.warning(f"Content retrieval failed for session {session_id}")
                relevant_content = []
            else:
                relevant_content = retrieval_result['results']
            
            # Generate response
            response = self._generate_response(session, user_message, relevant_content)
            
            # Add assistant response to history
            self.add_message(session_id, "assistant", response)
            
            return {
                'success': True,
                'response': response,
                'sources_used': len(relevant_content),
                'session_id': session_id
            }
            
        except Exception as e:
            logger.error(f"Failed to generate response for session {session_id}: {str(e)}")
            return {
                'success': False,
                'error': str(e),
                'response': 'I apologize, but I encountered an error while processing your message. Please try again.'
            }
    
    def _generate_response(self, session: ChatSession, user_message: str, relevant_content: List[Dict]) -> str:
        """Generate response using OpenAI with RAG context"""
        try:
            # Build context from relevant content
            context_parts = []
            
            if relevant_content:
                context_parts.append("Relevant information from your documents:")
                for i, item in enumerate(relevant_content[:3], 1):  # Limit to top 3 results
                    source = "PDF" if item['source'] == 'pdf' else "Lecture"
                    context_parts.append(f"{i}. [{source}] {item['content'][:500]}...")
                context_parts.append("")
            
            # Build conversation history (limited to recent messages)
            conversation_history = []
            recent_messages = session.messages[-6:]  # Last 6 messages for context
            
            for msg in recent_messages[:-1]:  # Exclude the current user message
                conversation_history.append(f"{msg.role.title()}: {msg.content}")
            
            # Create system prompt
            system_prompt = """You are a helpful AI assistant that can answer questions about uploaded PDF documents and generated lectures. 

Key guidelines:
1. Use the provided relevant information to answer questions accurately
2. If you don't have enough information in the context, say so clearly
3. Maintain a conversational and educational tone
4. Reference the source (PDF or Lecture) when appropriate
5. Be concise but thorough in your explanations
6. If asked about something not in the documents, explain that your knowledge is limited to the uploaded content

Always strive to be helpful while being honest about the limitations of your knowledge."""
            
            # Build the full prompt
            messages = [{"role": "system", "content": system_prompt}]
            
            # Add context if available
            if context_parts:
                context_message = "\n".join(context_parts)
                messages.append({"role": "system", "content": context_message})
            
            # Add conversation history
            if conversation_history:
                history_message = "Previous conversation:\n" + "\n".join(conversation_history)
                messages.append({"role": "system", "content": history_message})
            
            # Add current user message
            messages.append({"role": "user", "content": user_message})
            
            # Generate response
            response = self.client.chat.completions.create(
                model="gpt-4o-mini",
                messages=messages,
                temperature=0.7,
                max_tokens=1000
            )
            
            return response.choices[0].message.content
            
        except Exception as e:
            logger.error(f"Response generation failed: {str(e)}")
            return "I apologize, but I'm having trouble generating a response right now. Please try rephrasing your question."
    
    def get_session_history(self, session_id: str) -> List[Dict[str, Any]]:
        """Get chat history for a session"""
        try:
            if session_id not in self.sessions:
                return []
            
            session = self.sessions[session_id]
            return [
                {
                    'role': msg.role,
                    'content': msg.content,
                    'timestamp': msg.timestamp.isoformat()
                }
                for msg in session.messages
            ]
            
        except Exception as e:
            logger.error(f"Failed to get session history {session_id}: {str(e)}")
            return []
    
    def clear_session(self, session_id: str) -> bool:
        """Clear a chat session and its data"""
        try:
            # Clear from RAG system
            self.rag_system.clear_session_data(session_id)
            
            # Remove from local sessions
            if session_id in self.sessions:
                del self.sessions[session_id]
            
            logger.info(f"Cleared session {session_id}")
            return True
            
        except Exception as e:
            logger.error(f"Failed to clear session {session_id}: {str(e)}")
            return False
    
    def get_session_stats(self, session_id: str) -> Dict[str, Any]:
        """Get statistics about a session"""
        try:
            if session_id not in self.sessions:
                return {'exists': False}
            
            session = self.sessions[session_id]
            rag_stats = self.rag_system.get_session_stats(session_id)
            
            return {
                'exists': True,
                'message_count': len(session.messages),
                'created_at': session.created_at.isoformat(),
                'has_pdf': session.pdf_content is not None,
                'has_lecture': session.lecture_content is not None,
                **rag_stats
            }
            
        except Exception as e:
            logger.error(f"Failed to get session stats {session_id}: {str(e)}")
            return {'exists': False, 'error': str(e)}
    
    def update_session_content(self, session_id: str, pdf_content: str = None, lecture_content: str = None) -> bool:
        """Update session with new content"""
        try:
            if session_id not in self.sessions:
                return False
            
            session = self.sessions[session_id]
            
            # Update PDF content
            if pdf_content:
                session.pdf_content = pdf_content
                self.rag_system.add_pdf_content(session_id, pdf_content)
            
            # Update lecture content
            if lecture_content:
                session.lecture_content = lecture_content
                self.rag_system.add_lecture_content(session_id, lecture_content)
            
            logger.info(f"Updated content for session {session_id}")
            return True
            
        except Exception as e:
            logger.error(f"Failed to update session content {session_id}: {str(e)}")
            return False
        
    def set_api_key(self, api_key: str):
        """Set the OpenAI API key dynamically."""
        self.client = openai.OpenAI(api_key=api_key)