Архипов Дмитрий commited on
Commit
4b5b231
·
1 Parent(s): eb39d7b
.streamlit/config.toml DELETED
@@ -1,23 +0,0 @@
1
- [theme]
2
- base = "light"
3
- primaryColor = "#1f77b4"
4
- backgroundColor = "#ffffff"
5
- secondaryBackgroundColor = "#f0f2f6"
6
- textColor = "#262730"
7
- font = "sans serif"
8
-
9
- [server]
10
- # Important for HuggingFace Spaces
11
- enableCORS = false
12
- enableXsrfProtection = true
13
- maxUploadSize = 200
14
- headless = true
15
-
16
- [browser]
17
- # Prevent auto-reruns on HF Spaces
18
- gatherUsageStats = false
19
-
20
- [runner]
21
- # Optimize for production environments like HF Spaces
22
- magicEnabled = false
23
- fastReruns = true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md DELETED
@@ -1,19 +0,0 @@
1
- ---
2
- title: News TG Channel RAG
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
- pinned: false
11
- short_description: Streamlit template space
12
- ---
13
-
14
- # Welcome to Streamlit!
15
-
16
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
17
-
18
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
19
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Streamlit Frontend для RAG вопросно-ответной системы
3
+ Чат-интерфейс с поддержкой нескольких диалогов
4
+ """
5
+ import streamlit as st
6
+ from datetime import datetime, timedelta
7
+ from typing import List, Dict, Optional
8
+ import uuid
9
+
10
+ from src import RAG
11
+ from src.db_utils.history_utils import (
12
+ init_history_table,
13
+ log_query,
14
+ get_all_history,
15
+ get_history_by_dialogue,
16
+ search_history,
17
+ get_history_stats,
18
+ delete_history,
19
+ get_recent_dialogues
20
+ )
21
+
22
+
23
+ # --- Инициализация RAG и БД ---
24
+ @st.cache_resource(show_spinner=False)
25
+ def get_rag():
26
+ """Initialize RAG once and cache it"""
27
+ return RAG(
28
+ embed_model_name = "Qwen/Qwen3-Embedding-0.6B",
29
+ embed_index_name = "recursive_Qwen3-Embedding-0.6B"
30
+ )
31
+
32
+
33
+ @st.cache_resource(show_spinner=False)
34
+ def init_db():
35
+ """Initialize database once and cache it"""
36
+ try:
37
+ init_history_table()
38
+ return True
39
+ except Exception as e:
40
+ st.error(f"⚠️ Не удалось инициализировать таблицу истории: {e}")
41
+ return False
42
+
43
+
44
+ # --- Session State Management ---
45
+ def init_session_state():
46
+ """Initialize session state with caching"""
47
+ if "current_dialogue_id" not in st.session_state:
48
+ st.session_state.current_dialogue_id = None
49
+ if "chat_list" not in st.session_state:
50
+ st.session_state.chat_list = []
51
+ if "current_chat_messages" not in st.session_state:
52
+ st.session_state.current_chat_messages = []
53
+ if "chat_list_loaded" not in st.session_state:
54
+ st.session_state.chat_list_loaded = False
55
+
56
+
57
+ def generate_dialogue_id() -> str:
58
+ """Generate unique dialogue ID"""
59
+ return f"chat_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
60
+
61
+
62
+ def get_chat_display_name(dialogue_id: str, first_query: str = None) -> str:
63
+ """Get display name for chat - always from DB, no caching"""
64
+ if first_query:
65
+ # Use first 40 chars of first query as name
66
+ name = first_query[:40] + "..." if len(first_query) > 40 else first_query
67
+ return name
68
+
69
+ return "Новый диалог"
70
+
71
+
72
+ # --- Chat Management Functions ---
73
+
74
+ def load_chats_list():
75
+ """Load and cache chats list from DB"""
76
+ try:
77
+ st.session_state.chat_list = get_recent_dialogues(limit=50)
78
+ st.session_state.chat_list_loaded = True
79
+ except Exception as e:
80
+ st.error(f"❌ Ошибка при загрузке чатов: {e}")
81
+ st.session_state.chat_list = []
82
+
83
+
84
+ def create_new_chat():
85
+ """Create a new chat"""
86
+ new_id = generate_dialogue_id()
87
+ st.session_state.current_dialogue_id = new_id
88
+ st.session_state.current_chat_messages = []
89
+ st.session_state.needs_rerun = True
90
+ return new_id
91
+
92
+
93
+ def switch_to_chat(dialogue_id: str):
94
+ """Switch to an existing chat and load its messages"""
95
+ st.session_state.current_dialogue_id = dialogue_id
96
+ load_current_chat_messages()
97
+ st.session_state.needs_rerun = True
98
+
99
+
100
+ def load_current_chat_messages():
101
+ """Load messages for current chat from DB and cache"""
102
+ if not st.session_state.current_dialogue_id:
103
+ st.session_state.current_chat_messages = []
104
+ return
105
+
106
+ try:
107
+ st.session_state.current_chat_messages = get_history_by_dialogue(
108
+ st.session_state.current_dialogue_id
109
+ )
110
+ except Exception as e:
111
+ st.error(f"❌ Ошибка при загрузке сообщений: {e}")
112
+ st.session_state.current_chat_messages = []
113
+
114
+
115
+ def get_current_chat_messages() -> List[Dict]:
116
+ """Get cached messages for current chat"""
117
+ return st.session_state.current_chat_messages
118
+
119
+
120
+ def send_message(query: str) -> Optional[Dict]:
121
+ """Send a message in current chat and update cache"""
122
+ try:
123
+ if not st.session_state.current_dialogue_id:
124
+ create_new_chat()
125
+
126
+ # Get RAG and invoke with cached history
127
+ rag = get_rag()
128
+
129
+ # Use cached messages
130
+ current_history = get_current_chat_messages()
131
+
132
+ # Pass history to RAG (it will use last N messages internally for enrichment)
133
+ result = rag.invoke(query, history=current_history)
134
+
135
+ # Log to history DB
136
+ query_id = log_query(
137
+ query=query,
138
+ answer=result.get("answer", ""),
139
+ reason=result.get("reason", ""),
140
+ dialogue_id=st.session_state.current_dialogue_id
141
+ )
142
+
143
+ result["query_id"] = query_id
144
+
145
+ # Update only current messages, not all chats
146
+ load_current_chat_messages()
147
+
148
+ # Mark that we need to refresh chat list (but don't do it immediately)
149
+ st.session_state.chat_list_loaded = False
150
+ st.session_state.needs_rerun = True
151
+
152
+ return result
153
+ except Exception as e:
154
+ st.error(f"❌ Ошибка при отправке сообщения: {e}")
155
+ return None
156
+
157
+
158
+ def delete_chat(dialogue_id: str) -> bool:
159
+ """Delete a chat from DB and update cache"""
160
+ try:
161
+ delete_history(dialogue_id=dialogue_id)
162
+
163
+ # If deleted current chat, clear selection
164
+ if st.session_state.current_dialogue_id == dialogue_id:
165
+ st.session_state.current_dialogue_id = None
166
+ st.session_state.current_chat_messages = []
167
+
168
+ # Mark that we need to reload chat list
169
+ st.session_state.chat_list_loaded = False
170
+ st.session_state.needs_rerun = True
171
+
172
+ return True
173
+ except Exception as e:
174
+ st.error(f"❌ Ошибка при удалении чата: {e}")
175
+ return False
176
+
177
+
178
+
179
+
180
+ # --- Page: Chat Interface ---
181
+ def page_chat():
182
+ """Main chat interface page"""
183
+
184
+ # Custom CSS to fix chat input at the bottom + keyboard shortcuts
185
+ st.markdown("""
186
+ <style>
187
+ /* Fix chat input at the bottom of main content area */
188
+ section[data-testid="stSidebar"] ~ div .stChatInput {
189
+ position: fixed;
190
+ bottom: 0;
191
+ background: white;
192
+ padding: 1rem;
193
+ z-index: 999;
194
+ border-top: 1px solid #e6e6e6;
195
+ margin-left: 0;
196
+ }
197
+
198
+ /* Add padding to main content to prevent overlap with fixed input */
199
+ .main .block-container {
200
+ padding-bottom: 100px;
201
+ }
202
+
203
+ /* Dark mode support */
204
+ [data-testid="stAppViewContainer"][data-theme="dark"] section[data-testid="stSidebar"] ~ div .stChatInput {
205
+ background: rgb(14, 17, 23);
206
+ border-top: 1px solid #333;
207
+ }
208
+
209
+ /* Adjust width to account for sidebar */
210
+ @media (min-width: 768px) {
211
+ section[data-testid="stSidebar"] ~ div .stChatInput {
212
+ left: var(--sidebar-width, 21rem);
213
+ right: 0;
214
+ }
215
+ }
216
+
217
+ /* When sidebar is collapsed */
218
+ section[data-testid="stSidebar"][aria-expanded="false"] ~ div .stChatInput {
219
+ left: 0;
220
+ }
221
+ </style>
222
+
223
+ <script>
224
+ // Add keyboard shortcuts support
225
+ document.addEventListener('DOMContentLoaded', function() {
226
+ // Find chat input field
227
+ const observer = new MutationObserver(function(mutations) {
228
+ const chatInput = document.querySelector('textarea[data-testid="stChatInput"]');
229
+ if (chatInput && !chatInput.hasAttribute('data-shortcut-attached')) {
230
+ chatInput.setAttribute('data-shortcut-attached', 'true');
231
+
232
+ // Add keyboard event listener
233
+ chatInput.addEventListener('keydown', function(e) {
234
+ // Enter (without Shift) - send message
235
+ if (e.key === 'Enter' && !e.shiftKey) {
236
+ e.preventDefault();
237
+ // Trigger the send button
238
+ const sendButton = document.querySelector('button[kind="primary"]');
239
+ if (sendButton) {
240
+ sendButton.click();
241
+ }
242
+ }
243
+ // Ctrl+Enter or Cmd+Enter - send message (alternative)
244
+ else if (e.key === 'Enter' && (e.ctrlKey || e.metaKey)) {
245
+ e.preventDefault();
246
+ const sendButton = document.querySelector('button[kind="primary"]');
247
+ if (sendButton) {
248
+ sendButton.click();
249
+ }
250
+ }
251
+ // Shift+Enter - new line (default behavior)
252
+ });
253
+ }
254
+ });
255
+
256
+ observer.observe(document.body, {
257
+ childList: true,
258
+ subtree: true
259
+ });
260
+ });
261
+ </script>
262
+ """, unsafe_allow_html=True)
263
+
264
+ # Check if we have a current chat
265
+ if not st.session_state.current_dialogue_id:
266
+ # Show welcome screen
267
+ st.title("💬 Чат с RAG системой")
268
+ st.markdown("---")
269
+
270
+ col1, col2, col3 = st.columns([1, 2, 1])
271
+ with col2:
272
+ st.info("👋 Добро пожаловать! Создайте новый чат или выберите существующий из списка слева.")
273
+
274
+ if st.button("🆕 Начать новый чат", type="primary", use_container_width=True):
275
+ create_new_chat()
276
+
277
+ return
278
+
279
+ # Get cached messages
280
+ current_messages = get_current_chat_messages()
281
+
282
+ # Display chat header
283
+ if current_messages:
284
+ chat_name = get_chat_display_name(
285
+ st.session_state.current_dialogue_id,
286
+ current_messages[0]["query"]
287
+ )
288
+ else:
289
+ chat_name = "Новый диалог"
290
+
291
+ col1, col2 = st.columns([4, 1])
292
+ with col1:
293
+ st.title(f"💬 {chat_name}")
294
+ with col2:
295
+ if st.button("🗑️ Удалить чат", use_container_width=True):
296
+ if delete_chat(st.session_state.current_dialogue_id):
297
+ st.success("✅ Чат удален")
298
+
299
+ st.markdown("---")
300
+
301
+ # Chat messages container - load from DB
302
+ if not current_messages:
303
+ st.info("📝 Начните диалог, задав первый вопрос ниже")
304
+ else:
305
+ # Display all messages
306
+ for msg in current_messages:
307
+ # User message
308
+ with st.chat_message("user"):
309
+ st.markdown(msg["query"])
310
+ timestamp_str = msg.get("timestamp", "")
311
+ try:
312
+ dt = datetime.fromisoformat(timestamp_str)
313
+ st.caption(f"🕐 {dt.strftime('%H:%M:%S')}")
314
+ except:
315
+ pass
316
+
317
+ # Assistant message
318
+ with st.chat_message("assistant"):
319
+ st.markdown(msg["answer"])
320
+
321
+ # Show reasoning in expander
322
+ if msg.get("reason"):
323
+ with st.expander("📝 Обоснование"):
324
+ st.markdown(msg["reason"])
325
+
326
+ # Input area - fixed at the bottom via CSS
327
+ query = st.chat_input(
328
+ "Введите ваш вопрос...",
329
+ key="chat_input"
330
+ )
331
+
332
+ if query:
333
+ # Send message and get response
334
+ with st.spinner("🤔 Думаю..."):
335
+ result = send_message(query)
336
+
337
+
338
+
339
+ # --- Main App ---
340
+ def main():
341
+ st.set_page_config(
342
+ page_title="RAG Chat System",
343
+ page_icon="💬",
344
+ layout="wide",
345
+ initial_sidebar_state="expanded"
346
+ )
347
+
348
+ # Initialize session state FIRST (before any other operations)
349
+ init_session_state()
350
+
351
+ # Initialize needs_rerun flag if not exists
352
+ if "needs_rerun" not in st.session_state:
353
+ st.session_state.needs_rerun = False
354
+
355
+ # Initialize history table once using cache
356
+ init_db()
357
+
358
+ # Load chats list if not loaded yet
359
+ if not st.session_state.chat_list_loaded:
360
+ load_chats_list()
361
+
362
+ # Sidebar
363
+ with st.sidebar:
364
+ st.title("💬 RAG Chat")
365
+
366
+ # New chat button
367
+ if st.button("➕ Новый чат", use_container_width=True, type="primary"):
368
+ create_new_chat()
369
+
370
+ st.markdown("---")
371
+
372
+ # Chats list - use cached
373
+ col1, col2 = st.columns([3, 1])
374
+ with col1:
375
+ st.subheader("📝 Ваши чаты")
376
+ with col2:
377
+ if st.button("🔄", help="Обновить список чатов"):
378
+ st.session_state.chat_list_loaded = False
379
+ load_chats_list()
380
+
381
+ if not st.session_state.chat_list:
382
+ st.info("Нет чатов. Создайте новый!")
383
+ else:
384
+ # Display chats from cache
385
+ for chat in st.session_state.chat_list:
386
+ dialogue_id = chat["dialogue_id"]
387
+ message_count = chat.get("message_count", 0)
388
+ started_at = chat.get("started_at", "")
389
+
390
+ # Get chat name (only load history if chat has messages)
391
+ if message_count > 0:
392
+ history = get_history_by_dialogue(dialogue_id)
393
+ first_query = history[0]["query"] if history else None
394
+ else:
395
+ first_query = None
396
+ chat_name = get_chat_display_name(dialogue_id, first_query)
397
+
398
+ # Format time
399
+ try:
400
+ dt = datetime.fromisoformat(started_at)
401
+ time_str = dt.strftime('%d.%m %H:%M')
402
+ except:
403
+ time_str = ""
404
+
405
+ # Check if this is current chat
406
+ is_current = dialogue_id == st.session_state.current_dialogue_id
407
+
408
+ # Format button text with chat name and metadata
409
+ button_text = f"{'📌' if is_current else '💬'} {chat_name}\n💬 {message_count} • {time_str}"
410
+
411
+ if st.button(
412
+ button_text,
413
+ key=f"chat_{dialogue_id}",
414
+ use_container_width=True,
415
+ type="primary" if is_current else "secondary"
416
+ ):
417
+ switch_to_chat(dialogue_id)
418
+
419
+ # Handle rerun at the end if needed
420
+ if st.session_state.needs_rerun:
421
+ st.session_state.needs_rerun = False
422
+ st.rerun()
423
+
424
+ # Main content area
425
+ page_chat()
426
+
427
+
428
+ if __name__ == "__main__":
429
+ main()
frontend.py CHANGED
@@ -20,7 +20,7 @@ from src.db_utils.history_utils import (
20
  )
21
 
22
 
23
- # --- Инициализация RAG и БД ---
24
  @st.cache_resource(show_spinner=False)
25
  def get_rag():
26
  """Initialize RAG once and cache it"""
@@ -30,28 +30,19 @@ def get_rag():
30
  )
31
 
32
 
33
- @st.cache_resource(show_spinner=False)
34
- def init_db():
35
- """Initialize database once and cache it"""
36
- try:
37
- init_history_table()
38
- return True
39
- except Exception as e:
40
- st.error(f"⚠️ Не удалось инициализировать таблицу истории: {e}")
41
- return False
42
-
43
-
44
  # --- Session State Management ---
45
  def init_session_state():
46
- """Initialize session state with caching"""
47
  if "current_dialogue_id" not in st.session_state:
48
  st.session_state.current_dialogue_id = None
49
  if "chat_list" not in st.session_state:
50
  st.session_state.chat_list = []
51
  if "current_chat_messages" not in st.session_state:
52
  st.session_state.current_chat_messages = []
53
- if "chat_list_loaded" not in st.session_state:
54
- st.session_state.chat_list_loaded = False
 
 
55
 
56
 
57
  def generate_dialogue_id() -> str:
@@ -60,10 +51,14 @@ def generate_dialogue_id() -> str:
60
 
61
 
62
  def get_chat_display_name(dialogue_id: str, first_query: str = None) -> str:
63
- """Get display name for chat - always from DB, no caching"""
 
 
 
64
  if first_query:
65
  # Use first 40 chars of first query as name
66
  name = first_query[:40] + "..." if len(first_query) > 40 else first_query
 
67
  return name
68
 
69
  return "Новый диалог"
@@ -72,10 +67,15 @@ def get_chat_display_name(dialogue_id: str, first_query: str = None) -> str:
72
  # --- Chat Management Functions ---
73
 
74
  def load_chats_list():
75
- """Load and cache chats list from DB"""
76
  try:
77
- st.session_state.chat_list = get_recent_dialogues(limit=50)
78
- st.session_state.chat_list_loaded = True
 
 
 
 
 
79
  except Exception as e:
80
  st.error(f"❌ Ошибка при загрузке чатов: {e}")
81
  st.session_state.chat_list = []
@@ -86,53 +86,38 @@ def create_new_chat():
86
  new_id = generate_dialogue_id()
87
  st.session_state.current_dialogue_id = new_id
88
  st.session_state.current_chat_messages = []
89
- st.session_state.needs_rerun = True
90
  return new_id
91
 
92
 
93
  def switch_to_chat(dialogue_id: str):
94
- """Switch to an existing chat and load its messages"""
95
  st.session_state.current_dialogue_id = dialogue_id
96
- load_current_chat_messages()
97
- st.session_state.needs_rerun = True
98
 
99
 
100
- def load_current_chat_messages():
101
- """Load messages for current chat from DB and cache"""
102
- if not st.session_state.current_dialogue_id:
103
- st.session_state.current_chat_messages = []
104
- return
105
-
106
  try:
107
- st.session_state.current_chat_messages = get_history_by_dialogue(
108
- st.session_state.current_dialogue_id
109
- )
110
  except Exception as e:
111
  st.error(f"❌ Ошибка при загрузке сообщений: {e}")
112
  st.session_state.current_chat_messages = []
113
 
114
 
115
- def get_current_chat_messages() -> List[Dict]:
116
- """Get cached messages for current chat"""
117
- return st.session_state.current_chat_messages
118
-
119
-
120
  def send_message(query: str) -> Optional[Dict]:
121
- """Send a message in current chat and update cache"""
122
  try:
123
  if not st.session_state.current_dialogue_id:
124
  create_new_chat()
125
 
126
- # Get RAG and invoke with cached history
127
  rag = get_rag()
128
 
129
- # Use cached messages
130
- current_history = get_current_chat_messages()
131
 
132
- # Pass history to RAG (it will use last N messages internally for enrichment)
133
- result = rag.invoke(query, history=current_history)
134
-
135
- # Log to history DB
136
  query_id = log_query(
137
  query=query,
138
  answer=result.get("answer", ""),
@@ -142,12 +127,11 @@ def send_message(query: str) -> Optional[Dict]:
142
 
143
  result["query_id"] = query_id
144
 
145
- # Update only current messages, not all chats
146
- load_current_chat_messages()
147
 
148
- # Mark that we need to refresh chat list (but don't do it immediately)
149
- st.session_state.chat_list_loaded = False
150
- st.session_state.needs_rerun = True
151
 
152
  return result
153
  except Exception as e:
@@ -156,18 +140,17 @@ def send_message(query: str) -> Optional[Dict]:
156
 
157
 
158
  def delete_chat(dialogue_id: str) -> bool:
159
- """Delete a chat from DB and update cache"""
160
  try:
161
  delete_history(dialogue_id=dialogue_id)
162
 
163
- # If deleted current chat, clear selection
164
  if st.session_state.current_dialogue_id == dialogue_id:
165
  st.session_state.current_dialogue_id = None
166
  st.session_state.current_chat_messages = []
167
 
168
- # Mark that we need to reload chat list
169
- st.session_state.chat_list_loaded = False
170
- st.session_state.needs_rerun = True
171
 
172
  return True
173
  except Exception as e:
@@ -273,17 +256,15 @@ def page_chat():
273
 
274
  if st.button("🆕 Начать новый чат", type="primary", use_container_width=True):
275
  create_new_chat()
 
276
 
277
  return
278
 
279
- # Get cached messages
280
- current_messages = get_current_chat_messages()
281
-
282
  # Display chat header
283
- if current_messages:
284
  chat_name = get_chat_display_name(
285
  st.session_state.current_dialogue_id,
286
- current_messages[0]["query"]
287
  )
288
  else:
289
  chat_name = "Новый диалог"
@@ -295,15 +276,16 @@ def page_chat():
295
  if st.button("🗑️ Удалить чат", use_container_width=True):
296
  if delete_chat(st.session_state.current_dialogue_id):
297
  st.success("✅ Чат удален")
 
298
 
299
  st.markdown("---")
300
 
301
- # Chat messages container - load from DB
302
- if not current_messages:
303
  st.info("📝 Начните диалог, задав первый вопрос ниже")
304
  else:
305
  # Display all messages
306
- for msg in current_messages:
307
  # User message
308
  with st.chat_message("user"):
309
  st.markdown(msg["query"])
@@ -333,6 +315,9 @@ def page_chat():
333
  # Send message and get response
334
  with st.spinner("🤔 Думаю..."):
335
  result = send_message(query)
 
 
 
336
 
337
 
338
 
@@ -345,18 +330,17 @@ def main():
345
  initial_sidebar_state="expanded"
346
  )
347
 
348
- # Initialize session state FIRST (before any other operations)
349
- init_session_state()
350
-
351
- # Initialize needs_rerun flag if not exists
352
- if "needs_rerun" not in st.session_state:
353
- st.session_state.needs_rerun = False
354
 
355
- # Initialize history table once using cache
356
- init_db()
357
 
358
- # Load chats list if not loaded yet
359
- if not st.session_state.chat_list_loaded:
360
  load_chats_list()
361
 
362
  # Sidebar
@@ -366,22 +350,17 @@ def main():
366
  # New chat button
367
  if st.button("➕ Новый чат", use_container_width=True, type="primary"):
368
  create_new_chat()
 
369
 
370
  st.markdown("---")
371
 
372
- # Chats list - use cached
373
- col1, col2 = st.columns([3, 1])
374
- with col1:
375
- st.subheader("📝 Ваши чаты")
376
- with col2:
377
- if st.button("🔄", help="Обновить список чатов"):
378
- st.session_state.chat_list_loaded = False
379
- load_chats_list()
380
 
381
  if not st.session_state.chat_list:
382
  st.info("Нет чатов. Создайте новый!")
383
  else:
384
- # Display chats from cache
385
  for chat in st.session_state.chat_list:
386
  dialogue_id = chat["dialogue_id"]
387
  message_count = chat.get("message_count", 0)
@@ -415,11 +394,7 @@ def main():
415
  type="primary" if is_current else "secondary"
416
  ):
417
  switch_to_chat(dialogue_id)
418
-
419
- # Handle rerun at the end if needed
420
- if st.session_state.needs_rerun:
421
- st.session_state.needs_rerun = False
422
- st.rerun()
423
 
424
  # Main content area
425
  page_chat()
 
20
  )
21
 
22
 
23
+ # --- Инициализация RAG ---
24
  @st.cache_resource(show_spinner=False)
25
  def get_rag():
26
  """Initialize RAG once and cache it"""
 
30
  )
31
 
32
 
 
 
 
 
 
 
 
 
 
 
 
33
  # --- Session State Management ---
34
  def init_session_state():
35
+ """Initialize session state variables for chat support"""
36
  if "current_dialogue_id" not in st.session_state:
37
  st.session_state.current_dialogue_id = None
38
  if "chat_list" not in st.session_state:
39
  st.session_state.chat_list = []
40
  if "current_chat_messages" not in st.session_state:
41
  st.session_state.current_chat_messages = []
42
+ if "chat_names" not in st.session_state:
43
+ st.session_state.chat_names = {} # {dialogue_id: custom_name}
44
+ if "chats_loaded" not in st.session_state:
45
+ st.session_state.chats_loaded = False
46
 
47
 
48
  def generate_dialogue_id() -> str:
 
51
 
52
 
53
  def get_chat_display_name(dialogue_id: str, first_query: str = None) -> str:
54
+ """Get display name for chat"""
55
+ if dialogue_id in st.session_state.chat_names:
56
+ return st.session_state.chat_names[dialogue_id]
57
+
58
  if first_query:
59
  # Use first 40 chars of first query as name
60
  name = first_query[:40] + "..." if len(first_query) > 40 else first_query
61
+ st.session_state.chat_names[dialogue_id] = name
62
  return name
63
 
64
  return "Новый диалог"
 
67
  # --- Chat Management Functions ---
68
 
69
  def load_chats_list():
70
+ """Load all available chats from database"""
71
  try:
72
+ dialogues = get_recent_dialogues(limit=50)
73
+ st.session_state.chat_list = dialogues
74
+ st.session_state.chats_loaded = True
75
+
76
+ # If no current chat selected and chats exist, select the first one
77
+ if not st.session_state.current_dialogue_id and dialogues:
78
+ switch_to_chat(dialogues[0]["dialogue_id"])
79
  except Exception as e:
80
  st.error(f"❌ Ошибка при загрузке чатов: {e}")
81
  st.session_state.chat_list = []
 
86
  new_id = generate_dialogue_id()
87
  st.session_state.current_dialogue_id = new_id
88
  st.session_state.current_chat_messages = []
 
89
  return new_id
90
 
91
 
92
  def switch_to_chat(dialogue_id: str):
93
+ """Switch to an existing chat"""
94
  st.session_state.current_dialogue_id = dialogue_id
95
+ load_chat_messages(dialogue_id)
 
96
 
97
 
98
+ def load_chat_messages(dialogue_id: str):
99
+ """Load messages for a specific chat"""
 
 
 
 
100
  try:
101
+ history = get_history_by_dialogue(dialogue_id)
102
+ st.session_state.current_chat_messages = history
 
103
  except Exception as e:
104
  st.error(f"❌ Ошибка при загрузке сообщений: {e}")
105
  st.session_state.current_chat_messages = []
106
 
107
 
 
 
 
 
 
108
  def send_message(query: str) -> Optional[Dict]:
109
+ """Send a message in current chat"""
110
  try:
111
  if not st.session_state.current_dialogue_id:
112
  create_new_chat()
113
 
114
+ # Get RAG and invoke with history
115
  rag = get_rag()
116
 
117
+ # Pass current chat history to RAG (it will use last N messages internally for enrichment)
118
+ result = rag.invoke(query, history=st.session_state.current_chat_messages)
119
 
120
+ # Log to history
 
 
 
121
  query_id = log_query(
122
  query=query,
123
  answer=result.get("answer", ""),
 
127
 
128
  result["query_id"] = query_id
129
 
130
+ # Update current chat messages
131
+ load_chat_messages(st.session_state.current_dialogue_id)
132
 
133
+ # Reload chats list to update
134
+ load_chats_list()
 
135
 
136
  return result
137
  except Exception as e:
 
140
 
141
 
142
  def delete_chat(dialogue_id: str) -> bool:
143
+ """Delete a chat"""
144
  try:
145
  delete_history(dialogue_id=dialogue_id)
146
 
147
+ # If deleted current chat, switch to another or create new
148
  if st.session_state.current_dialogue_id == dialogue_id:
149
  st.session_state.current_dialogue_id = None
150
  st.session_state.current_chat_messages = []
151
 
152
+ # Reload chats
153
+ load_chats_list()
 
154
 
155
  return True
156
  except Exception as e:
 
256
 
257
  if st.button("🆕 Начать новый чат", type="primary", use_container_width=True):
258
  create_new_chat()
259
+ st.rerun()
260
 
261
  return
262
 
 
 
 
263
  # Display chat header
264
+ if st.session_state.current_chat_messages:
265
  chat_name = get_chat_display_name(
266
  st.session_state.current_dialogue_id,
267
+ st.session_state.current_chat_messages[0]["query"]
268
  )
269
  else:
270
  chat_name = "Новый диалог"
 
276
  if st.button("🗑️ Удалить чат", use_container_width=True):
277
  if delete_chat(st.session_state.current_dialogue_id):
278
  st.success("✅ Чат удален")
279
+ st.rerun()
280
 
281
  st.markdown("---")
282
 
283
+ # Chat messages container
284
+ if not st.session_state.current_chat_messages:
285
  st.info("📝 Начните диалог, задав первый вопрос ниже")
286
  else:
287
  # Display all messages
288
+ for msg in st.session_state.current_chat_messages:
289
  # User message
290
  with st.chat_message("user"):
291
  st.markdown(msg["query"])
 
315
  # Send message and get response
316
  with st.spinner("🤔 Думаю..."):
317
  result = send_message(query)
318
+
319
+ if result:
320
+ st.rerun()
321
 
322
 
323
 
 
330
  initial_sidebar_state="expanded"
331
  )
332
 
333
+ # Initialize history table on startup
334
+ try:
335
+ init_history_table()
336
+ except Exception as e:
337
+ st.error(f"⚠️ Не удалось инициализировать таблицу истории: {e}")
 
338
 
339
+ # Initialize session state
340
+ init_session_state()
341
 
342
+ # Load chats list if not loaded
343
+ if not st.session_state.chats_loaded:
344
  load_chats_list()
345
 
346
  # Sidebar
 
350
  # New chat button
351
  if st.button("➕ Новый чат", use_container_width=True, type="primary"):
352
  create_new_chat()
353
+ st.rerun()
354
 
355
  st.markdown("---")
356
 
357
+ # Chats list
358
+ st.subheader("📝 Ваши чаты")
 
 
 
 
 
 
359
 
360
  if not st.session_state.chat_list:
361
  st.info("Нет чатов. Создайте новый!")
362
  else:
363
+ # Display chats
364
  for chat in st.session_state.chat_list:
365
  dialogue_id = chat["dialogue_id"]
366
  message_count = chat.get("message_count", 0)
 
394
  type="primary" if is_current else "secondary"
395
  ):
396
  switch_to_chat(dialogue_id)
397
+ st.rerun()
 
 
 
 
398
 
399
  # Main content area
400
  page_chat()
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  pandas==2.3.3
 
2
  pyaes==1.6.1
3
  Pyrogram==2.0.106
4
  PySocks==1.7.1
@@ -24,6 +25,8 @@ langchain-openai==1.0.0
24
  sqlalchemy==2.0.44
25
  psycopg2-binary==2.9.11
26
  qdrant-client==1.16.2
 
 
27
  openai==1.109.1
28
  pydantic==2.9.2
29
  tenacity==9.0.0
 
1
  pandas==2.3.3
2
+ python-dotenv==1.2.1
3
  pyaes==1.6.1
4
  Pyrogram==2.0.106
5
  PySocks==1.7.1
 
25
  sqlalchemy==2.0.44
26
  psycopg2-binary==2.9.11
27
  qdrant-client==1.16.2
28
+ fastapi==0.124.4
29
+ uvicorn==0.38.0
30
  openai==1.109.1
31
  pydantic==2.9.2
32
  tenacity==9.0.0
server.py CHANGED
@@ -46,7 +46,8 @@ rag = RAG(
46
 
47
  class QueryRequest(BaseModel):
48
  query: str
49
- dialogue_id: Optional[str] = None # Для будущей поддержки диалогов
 
50
 
51
 
52
  class QueryResponse(BaseModel):
@@ -86,8 +87,15 @@ class DialogueInfo(BaseModel):
86
  def rag_query(request: QueryRequest):
87
  """Основной endpoint для запросов к RAG. Логирует запрос после получения ответа."""
88
 
89
- # Получаем ответ от RAG
90
- result = rag.invoke(request.query)
 
 
 
 
 
 
 
91
 
92
  # Логируем в историю
93
  query_id = log_query(
 
46
 
47
  class QueryRequest(BaseModel):
48
  query: str
49
+ dialogue_id: Optional[str] = None
50
+ history: Optional[List[Dict[str, Any]]] = None # История диалога для контекста
51
 
52
 
53
  class QueryResponse(BaseModel):
 
87
  def rag_query(request: QueryRequest):
88
  """Основной endpoint для запросов к RAG. Логирует запрос после получения ответа."""
89
 
90
+ # Если передан dialogue_id, загружаем историю
91
+ history = None
92
+ if request.dialogue_id and not request.history:
93
+ history = get_history_by_dialogue(request.dialogue_id)
94
+ elif request.history:
95
+ history = request.history
96
+
97
+ # Получаем ответ от RAG с историей (история используется для обогащения вопроса)
98
+ result = rag.invoke(request.query, history=history)
99
 
100
  # Логируем в историю
101
  query_id = log_query(
src/parser/__init__.py DELETED
File without changes
src/parser/pyrosource.py DELETED
@@ -1,64 +0,0 @@
1
- import time
2
- from typing import Union, Generator, List, Dict, Any
3
-
4
- from pyrogram import Client
5
- from pyrogram.types import Message
6
-
7
-
8
- class PyroSource:
9
-
10
- def __init__(
11
- self,
12
- api_id: Union[int, str],
13
- api_hash: str,
14
- app_name: str = "default_app",
15
- ):
16
- self.client = Client(name=app_name, api_id=api_id, api_hash=api_hash)
17
-
18
-
19
- def load_messages(
20
- self,
21
- channel_id: Union[int, str],
22
- limit: int,
23
- offset: int = 0,
24
- offset_id: int = 0,
25
- time_sleep: float = 0.05,
26
- ) -> List[Dict[str, Any]]:
27
- """
28
- channel_id: channel id or username
29
- limit: number of messages to load
30
- offset: offset index
31
- offset_id: message id offset
32
- """
33
- posts = []
34
-
35
- with self.client as app:
36
- messages: Generator[Message] = app.get_chat_history(
37
- chat_id=channel_id,
38
- limit=limit,
39
- offset=offset,
40
- offset_id=offset_id,
41
- )
42
-
43
- for msg in messages:
44
- time.sleep(time_sleep)
45
-
46
- content = msg.text or msg.caption or ''
47
- original_author = (
48
- msg.forward_from_chat.username if msg.forward_from_chat else ''
49
- )
50
- message_dt = msg.date.strftime("%Y-%m-%d")
51
-
52
-
53
- meta = {
54
- "message_dt" : message_dt,
55
- "message_id" : msg.id,
56
- "channel_id" : channel_id,
57
- "content" : content,
58
- "views" : msg.views,
59
- "original_author" : original_author,
60
- }
61
-
62
- posts.append(meta)
63
-
64
- return posts
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/parser/run_loader.py DELETED
@@ -1,62 +0,0 @@
1
- import os
2
- import argparse
3
-
4
- from structlog import get_logger
5
- import pandas as pd
6
-
7
- from src.config import pyro_source
8
-
9
-
10
- BATCH_SIZE = 256
11
- logger = get_logger()
12
-
13
-
14
- def save_batch(df: pd.DataFrame, out_path: str, is_first_batch: bool):
15
- if is_first_batch:
16
- df.to_csv(out_path, index=False, mode="w")
17
- else:
18
- df.to_csv(out_path, index=False, mode="a", header=False)
19
-
20
-
21
- def main():
22
- parser = argparse.ArgumentParser(description="Telegram posts loader")
23
-
24
- parser.add_argument("--channel_id", type=str, required=True)
25
- parser.add_argument("--limit", type=int, required=True)
26
- parser.add_argument("--offset", type=int, default=0)
27
-
28
- args = parser.parse_args()
29
- total_limit = args.limit
30
- channel_id = args.channel_id
31
- base_offset = args.offset
32
-
33
-
34
- out_path = f"./channel_{channel_id}_posts.csv"
35
- is_first_batch = not os.path.exists(out_path)
36
-
37
-
38
- total_batches = (total_limit + BATCH_SIZE - 1) // BATCH_SIZE
39
-
40
- for batch_num in range(total_batches):
41
- logger.info(f"Batch #{batch_num} loading")
42
-
43
- current_offset = base_offset + batch_num * BATCH_SIZE
44
-
45
- posts = pyro_source.load_messages(
46
- channel_id=channel_id,
47
- limit=BATCH_SIZE,
48
- offset=current_offset
49
- )
50
-
51
- df = pd.DataFrame(posts)
52
- save_batch(df, out_path, is_first_batch)
53
- is_first_batch = False
54
-
55
-
56
- logger.info("Finished loading")
57
-
58
-
59
-
60
-
61
- if __name__ == "__main__":
62
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/rag/__pycache__/rag.cpython-313.pyc CHANGED
Binary files a/src/rag/__pycache__/rag.cpython-313.pyc and b/src/rag/__pycache__/rag.cpython-313.pyc differ
 
src/rag/rag.py CHANGED
@@ -26,6 +26,19 @@ class RAG:
26
  self.llm = get_model(LLM_API_KEY, LLM)
27
  self.history_length = CHAT_HISTORY_LENGTH
28
  self.enable_enrichment = ENABLE_QUESTION_ENRICHMENT
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  # Initialize question enricher if enabled
31
  if self.enable_enrichment:
@@ -53,23 +66,9 @@ class RAG:
53
  # Get context from retriever using enriched query
54
  context = self.retriever.chain.invoke(enriched_query)
55
 
56
- # Build prompt without history (enriched question already contains context)
57
- prompt = ChatPromptTemplate.from_messages([
58
- SystemMessagePromptTemplate.from_template(
59
- "Ты полезный и точный ассистент. "
60
- "Ответь на вопрос, опираясь ТОЛЬКО на предложенный контекст. "
61
- "Если в контексте нет ответа, ответь \"Не знаю.\""
62
- ),
63
- HumanMessagePromptTemplate.from_template(
64
- "{format_instructions}\n\n"
65
- "Контекст:\n{context}\n\n"
66
- "Вопрос: {question}"
67
- ),
68
- ])
69
-
70
  # Build chain
71
  chain = (
72
- prompt
73
  | self.llm
74
  | self.parser
75
  )
 
26
  self.llm = get_model(LLM_API_KEY, LLM)
27
  self.history_length = CHAT_HISTORY_LENGTH
28
  self.enable_enrichment = ENABLE_QUESTION_ENRICHMENT
29
+
30
+ self.prompt = ChatPromptTemplate.from_messages([
31
+ SystemMessagePromptTemplate.from_template(
32
+ "Ты полезный и точный ассистент. "
33
+ "Ответь на вопрос, опираясь ТОЛЬКО на предложенный контекст. "
34
+ "Если в контексте нет ответа, ответь \"Не знаю.\""
35
+ ),
36
+ HumanMessagePromptTemplate.from_template(
37
+ "{format_instructions}\n\n"
38
+ "Контекст:\n{context}\n\n"
39
+ "Вопрос: {question}"
40
+ ),
41
+ ])
42
 
43
  # Initialize question enricher if enabled
44
  if self.enable_enrichment:
 
66
  # Get context from retriever using enriched query
67
  context = self.retriever.chain.invoke(enriched_query)
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  # Build chain
70
  chain = (
71
+ self.prompt
72
  | self.llm
73
  | self.parser
74
  )
src/scripts/1_load_texts.py DELETED
@@ -1,84 +0,0 @@
1
- import pandas as pd
2
-
3
- from src.db_utils.sql_utils import sql_drop, sql_dump_df
4
-
5
-
6
-
7
- def strip_edges_allow_punct(s: str):
8
- allowed_punct = set(".,!?;:-–—") # можно расширять
9
-
10
- # Левый указатель — пока не буква/цифра
11
- left = 0
12
- while left < len(s) and not s[left].isalnum():
13
- left += 1
14
-
15
- # Правый указатель — пока не буква/цифра/пунктуация
16
- right = len(s) - 1
17
- while right >= 0 and not (s[right].isalnum() or s[right] in allowed_punct):
18
- right -= 1
19
-
20
- # Если всё мусор
21
- if right < left:
22
- return ""
23
-
24
- return s[left:right+1]
25
-
26
-
27
- def process_str(s: str):
28
- # Чистка статьи от мусора
29
- s = "\n".join(strip_edges_allow_punct(p) for p in s.split("\n") if p)
30
-
31
- for suf in [
32
- "Слушать прямой эфир",
33
- "Читать РБК Стиль в Telegram",
34
- "РБК Events, 18",
35
- "Подписаться | Онлайн-сомелье",
36
- "Читать РБК в Telegram",
37
- "Следить за новостями РБК в Telegram",
38
- "Следить за новостями РБК в МАХ",
39
- "Другие видео этого дня — в телеграм-канале РБК",
40
- "РБК в Telegram и MAX",
41
- "РБК в Telegram | MAX",
42
- "Подписаться на «РБК Спорт",
43
- "Картина дня — в телеграм-канале РБК",
44
- "Самые важные новости — в канале РБК в МАХ",
45
- "Больше инфографики — в телеграм-канале РБК",
46
- "Подписаться на «Сам ты инвестор!",
47
- "Читать РБК Недвижимость в Telegram"
48
- ]:
49
- s = s.removesuffix(suf).strip()
50
-
51
- parts = [p for p in s.split("\n") if p]
52
-
53
- prev_parts = [0] * 1000
54
- while len(prev_parts) != len(parts) and len(parts) != 0:
55
- prev_parts = parts
56
- if "Фото:" in parts[-1] or "Данные:" in parts[-1]:
57
- parts = parts[:-1]
58
-
59
- return "\n".join(parts)
60
-
61
-
62
- def is_advertisement(s: str):
63
- # Проверка рекламных объявлений
64
- last_part = [p for p in s.split("\n") if p][-1]
65
- return any(v in last_part for v in ["Реклама.", "Реклама,"])
66
-
67
-
68
-
69
-
70
- if __name__ == "__main__":
71
- # Предобработка документов
72
- rbc = pd.read_csv("src/dataset/rbc/channel_rbc_news_posts.csv")
73
-
74
- rbc["message_dt"] = pd.to_datetime(rbc["message_dt"]).dt.date
75
- rbc["content"] = rbc["content"].apply(lambda x: process_str(x))
76
- rbc["views"] = rbc["views"].astype(int)
77
-
78
- rbc = rbc[~rbc["content"].apply(is_advertisement)]
79
- rbc = rbc[["message_id", "channel_id", "message_dt", "views", "content"]]
80
-
81
- # Загрузка в бд
82
- table = "posts"
83
- sql_drop(table)
84
- sql_dump_df(rbc, table)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/scripts/2_load_vectors.py DELETED
@@ -1,51 +0,0 @@
1
- import time
2
-
3
- import pandas as pd
4
- from langchain_huggingface.embeddings import HuggingFaceEmbeddings
5
-
6
- from src.db_utils.sql_utils import sql_fetch_batch
7
- from src.db_utils.qdrant_utils import qdrant_insert, qdrant_create_index
8
- from src.data.splitter import Splitter
9
-
10
-
11
-
12
-
13
- if __name__ == "__main__":
14
- splitter_mode = "recursive"
15
- model_name = "deepvk/USER-bge-m3"
16
- vector_index_name = f"{splitter_mode}_{model_name.split('/')[1]}"
17
-
18
- # Инициализация объектов
19
- splitter = Splitter(splitter_mode, chunk_size=256, chunk_overlap=64)
20
- emb = HuggingFaceEmbeddings(
21
- model_name=model_name,
22
- encode_kwargs={"normalize_embeddings": True},
23
- )
24
-
25
- # Создание индекса
26
- qdrant_create_index(
27
- index_name=vector_index_name,
28
- dim=len(emb.embed_documents(["None"])[0]),
29
- distance="cosine",
30
- )
31
-
32
- # Загрузка документов батчами
33
- batch_size = 16
34
- offset = 0
35
- while True:
36
- rows = sql_fetch_batch(batch_size=batch_size, offset=offset)
37
- if not rows:
38
- break # дошли до конца
39
-
40
- dfs = []
41
- for r in rows:
42
- chunks = splitter.split_text(r["content"])
43
- vectors = emb.embed_documents(chunks)
44
-
45
- dfs.append(pd.DataFrame({"doc_id": r["ctid"], "text": chunks, "vector": vectors}))
46
-
47
- print(f"{offset} - {offset + batch_size}:", qdrant_insert(pd.concat(dfs), vector_index_name))
48
-
49
- offset += batch_size
50
-
51
- time.sleep(0.3)