MrSimple07 commited on
Commit
c28dd72
·
1 Parent(s): b395a0b

api key added

Browse files
Files changed (1) hide show
  1. utils.py +265 -232
utils.py CHANGED
@@ -1,21 +1,41 @@
 
 
1
  from llama_index.llms.google_genai import GoogleGenAI
 
2
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
3
  from sentence_transformers import CrossEncoder
 
 
 
4
  from my_logging import log_message
5
- import os
6
 
7
- api_key = os.getenv('GOOGLE_API_KEY') # or however you're loading it
8
- if not api_key:
9
- log_message("GOOGLE_API_KEY not found in environment")
10
- raise ValueError("GOOGLE_API_KEY not found in environment")
11
-
12
-
13
- def get_llm_model(model_name="gemini-2.5-flash"):
14
- api_key = os.getenv('GOOGLE_API_KEY')
15
- if not api_key:
16
- log_message("GOOGLE_API_KEY not found in environment")
17
- raise ValueError("GOOGLE_API_KEY not found in environment")
18
- return GoogleGenAI(model=model_name, api_key=api_key)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def get_embedding_model(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"):
21
  return HuggingFaceEmbedding(model_name=model_name)
@@ -23,250 +43,263 @@ def get_embedding_model(model_name="sentence-transformers/paraphrase-multilingua
23
  def get_reranker_model(model_name='cross-encoder/ms-marco-MiniLM-L-12-v2'):
24
  return CrossEncoder(model_name)
25
 
26
- def format_sources(nodes):
27
- sources = []
 
28
  for node in nodes:
29
- meta = node.metadata
30
- doc_type = meta.get('type', 'text')
31
- doc_id = meta.get('document_id', 'unknown')
32
 
33
- if doc_type == 'table':
34
- table_num = meta.get('table_number', 'unknown')
35
- title = meta.get('table_title', '')
36
- sources.append(f"📊 {doc_id} - Таблица {table_num}: {title}")
37
- elif doc_type == 'image':
38
- img_num = meta.get('image_number', 'unknown')
39
- sources.append(f"🖼️ {doc_id} - Рисунок {img_num}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  else:
41
- section = meta.get('section_id', '')
42
- sources.append(f"📄 {doc_id} - Раздел {section}")
 
43
 
44
- return "\n".join(set(sources))
45
-
46
- def create_chunks_info_for_display(nodes):
47
- chunks_info = []
48
- for node in nodes:
49
- meta = node.metadata
50
- chunk_info = {
51
- 'document_id': meta.get('document_id', 'unknown'),
52
- 'section_path': meta.get('section_path', ''),
53
- 'section_id': meta.get('section_id', 'unknown'),
54
- 'section_text': meta.get('section_text', ''),
55
- 'parent_section': meta.get('parent_section', ''),
56
- 'parent_title': meta.get('parent_title', ''),
57
- 'level': meta.get('level', ''),
58
- 'chunk_text': node.text[:500],
59
- 'type': meta.get('type', 'text'),
60
- 'table_number': meta.get('table_number', ''),
61
- 'image_number': meta.get('image_number', '')
62
- }
63
- chunks_info.append(chunk_info)
64
- return chunks_info
65
 
66
- def format_answer_html(answer_text, model_name):
67
- html = f"""
68
- <div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px;'>
69
- <div style='margin-bottom: 10px;'>
70
- <span style='background-color: #4a5568; padding: 5px 10px; border-radius: 5px; font-size: 12px;'>
71
- Модель: {model_name}
72
- </span>
73
- </div>
74
- <div style='line-height: 1.6;'>
75
- {answer_text}
76
- </div>
77
- </div>
78
- """
79
- return html
80
 
81
- def format_sources_html(sources_text):
82
- if not sources_text or sources_text == "":
83
- return "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; text-align: center;'>Нет источников</div>"
84
 
85
- sources_list = sources_text.strip().split('\n')
86
- html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px;'>"
87
- html += "<h4 style='color: white; margin-bottom: 15px;'>Использованные источники:</h4>"
88
- html += "<div style='line-height: 2;'>"
89
 
90
- for source in sources_list:
91
- if source.strip():
92
- html += f"<div style='padding: 5px 0; border-bottom: 1px solid #4a5568;'>{source}</div>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
- html += "</div></div>"
95
- return html
 
 
 
 
 
 
 
 
96
 
97
- def format_chunks_html(chunks_info):
98
- if not chunks_info:
99
- return "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; text-align: center;'>Нет данных о чанках</div>"
100
-
101
- html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; max-height: 500px; overflow-y: auto;'>"
102
- html += f"<h4 style='color: white; margin-bottom: 15px;'>Найдено релевантных чанков: {len(chunks_info)}</h4>"
103
-
104
- for i, chunk in enumerate(chunks_info):
105
- bg_color = "#4a5568" if i % 2 == 0 else "#374151"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
- from app import get_section_display, get_formatted_content
108
- section_display = get_section_display(chunk)
109
- formatted_content = get_formatted_content(chunk)
 
 
 
110
 
111
- html += f"""
112
- <div style='background-color: {bg_color}; padding: 10px; margin: 5px 0; border-radius: 5px; border-left: 4px solid #60a5fa;'>
113
- <strong style='color: #93c5fd;'>Документ:</strong> <span style='color: white;'>{chunk['document_id']}</span><br>
114
- <strong style='color: #93c5fd;'>Раздел:</strong> <span style='color: white;'>{section_display}</span><br>
115
- <strong style='color: #93c5fd;'>Содержание:</strong><br>
116
- <div style='background-color: #1f2937; padding: 8px; margin-top: 5px; border-radius: 3px; font-family: monospace; font-size: 12px; color: #d1d5db; max-height: 200px; overflow-y: auto;'>
117
- {formatted_content}
118
- </div>
119
- </div>
120
- """
121
 
122
  html += "</div>"
123
  return html
124
 
125
- def deduplicate_nodes(nodes):
126
- """Deduplicate retrieved nodes based on unique identifiers"""
127
- seen = set()
128
- unique_nodes = []
129
 
130
- for node in nodes:
131
- # Create unique identifier from metadata
132
- doc_id = node.metadata.get('document_id', '')
133
- section_id = node.metadata.get('section_id', '')
134
- chunk_id = node.metadata.get('chunk_id', 0)
135
- node_type = node.metadata.get('type', 'text')
136
 
137
- if node_type == 'table':
138
- table_num = node.metadata.get('table_number', '')
139
- identifier = f"{doc_id}|table|{table_num}|{chunk_id}"
140
- elif node_type == 'image':
141
- img_num = node.metadata.get('image_number', '')
142
- identifier = f"{doc_id}|image|{img_num}"
143
- else:
144
- identifier = f"{doc_id}|{section_id}|{chunk_id}"
145
 
146
- if identifier not in seen:
147
- seen.add(identifier)
148
- unique_nodes.append(node)
149
-
150
- return unique_nodes
151
-
152
-
153
- def answer_question(question, query_engine, reranker, model_name):
154
- try:
155
- log_message(f"\n{'='*70}")
156
- log_message(f"QUERY: {question}")
157
-
158
- retrieved = query_engine.retrieve(question)
159
- total_retrieved = len(retrieved)
160
- log_message(f"RETRIEVED: {total_retrieved} nodes (before deduplication)")
161
 
162
- # Deduplicate
163
- unique_retrieved = deduplicate_nodes(retrieved)
164
- duplicates_removed = total_retrieved - len(unique_retrieved)
165
- log_message(f"DEDUPLICATION: {duplicates_removed} duplicates removed")
166
- log_message(f"UNIQUE NODES: {len(unique_retrieved)} nodes")
167
-
168
- reranked = rerank_nodes(question, unique_retrieved, reranker, top_k=20, min_score=-0.5)
169
- log_message(f"RERANKED: {len(reranked)} nodes (after scoring)")
170
-
171
-
172
- doc_groups = {}
173
- for n in reranked:
174
- doc_id = n.metadata.get('document_id', 'unknown')
175
- if doc_id not in doc_groups:
176
- doc_groups[doc_id] = {'tables': [], 'text': [], 'images': []}
177
-
178
- node_type = n.metadata.get('type', 'text')
179
- if node_type == 'table':
180
- doc_groups[doc_id]['tables'].append(n)
181
- elif node_type == 'image':
182
- doc_groups[doc_id]['images'].append(n)
183
- else:
184
- doc_groups[doc_id]['text'].append(n)
185
 
186
- log_message(f"Documents found: {list(doc_groups.keys())}")
187
-
188
- context_parts = []
189
- for doc_id, groups in doc_groups.items():
190
- doc_section = [f"=== ДОКУМЕНТ: {doc_id} ==="]
191
-
192
- if groups['tables']:
193
- doc_section.append("\n--- ТАБЛИЦЫ ---")
194
- for n in groups['tables']:
195
- meta = n.metadata
196
- table_id = meta.get('table_identifier', meta.get('table_number', 'unknown'))
197
- title = meta.get('table_title', '')
198
- doc_section.append(f"\n[Таблица {table_id}] {title}")
199
- doc_section.append(n.text[:1500])
200
- log_message(f" Included table {table_id} from {doc_id}")
201
-
202
- if groups['images']:
203
- doc_section.append("\n--- ИЗОБРАЖЕНИЯ ---")
204
- for n in groups['images']:
205
- meta = n.metadata
206
- img_id = meta.get('image_number', 'unknown')
207
- doc_section.append(f"\n[Рисунок {img_id}]")
208
- doc_section.append(n.text[:1000])
209
- log_message(f" Included image {img_id} from {doc_id}")
210
-
211
- if groups['text']:
212
- doc_section.append("\n--- ТЕКСТ ---")
213
- for n in groups['text'][:3]:
214
- doc_section.append(n.text[:800])
215
- log_message(f" Included text section from {doc_id}")
216
-
217
- context_parts.append("\n".join(doc_section))
218
-
219
- context = "\n\n" + ("="*70 + "\n\n").join(context_parts)
220
 
221
- log_message(f"Context length: {len(context)} chars")
222
-
223
- from config import CUSTOM_PROMPT
224
- prompt = CUSTOM_PROMPT.format(context_str=context, query_str=question)
225
-
226
- from llama_index.core import Settings
227
- response = Settings.llm.complete(prompt)
228
-
229
- sources_text = format_sources(reranked)
230
- chunks_info = create_chunks_info_for_display(reranked)
231
 
232
- answer_html = format_answer_html(response.text, model_name)
233
- sources_html = format_sources_html(sources_text)
234
- chunks_html = format_chunks_html(chunks_info)
235
 
236
- return answer_html, sources_html, chunks_html
237
 
238
- except Exception as e:
239
- log_message(f"Error: {e}")
240
- import traceback
241
- log_message(traceback.format_exc())
242
 
243
- error_html = f"<div style='background-color: #2d3748; color: #ef4444; padding: 20px; border-radius: 10px;'>Ошибка: {str(e)}</div>"
244
- sources_html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; text-align: center;'>Источники недоступны из-за ошибки</div>"
245
- chunks_html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; text-align: center;'>Чанки недоступны из-за ошибки</div>"
246
 
247
- return error_html, sources_html, chunks_html
248
-
249
- def rerank_nodes(query, nodes, reranker, top_k=20, min_score=-0.5):
250
- if not nodes or not reranker:
251
- log_message("WARNING: No nodes or reranker available")
252
- return nodes[:top_k]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
 
254
- pairs = [[query, n.text[:500]] for n in nodes]
255
- scores = reranker.predict(pairs)
256
- scored = sorted(zip(nodes, scores), key=lambda x: x[1], reverse=True)
257
-
258
- if scored:
259
- top_5_scores = [s for _, s in scored[:5]]
260
- bottom_5_scores = [s for _, s in scored[-5:]]
261
- log_message(f"Score range: {min(scores):.3f} to {max(scores):.3f}")
262
- log_message(f"Top 5 scores: {top_5_scores}")
263
- log_message(f"Bottom 5 scores: {bottom_5_scores}")
264
-
265
- above_threshold = sum(1 for _, s in scored if s >= min_score)
266
- log_message(f"Nodes above threshold ({min_score}): {above_threshold}/{len(scored)}")
267
-
268
- filtered = [n for n, s in scored if s >= min_score]
269
- result = filtered[:top_k] if filtered else [n for n, _ in scored[:top_k]]
270
-
271
- log_message(f"Returning {len(result)} nodes after reranking")
272
- return result
 
1
+ import logging
2
+ import sys
3
  from llama_index.llms.google_genai import GoogleGenAI
4
+ from llama_index.llms.openai import OpenAI
5
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
6
  from sentence_transformers import CrossEncoder
7
+ from config import AVAILABLE_MODELS, DEFAULT_MODEL, GOOGLE_API_KEY
8
+ import time
9
+ from index_retriever import rerank_nodes
10
  from my_logging import log_message
11
+ from config import PROMPT_SIMPLE_POISK
12
 
13
+ def get_llm_model(model_name):
14
+ try:
15
+ model_config = AVAILABLE_MODELS.get(model_name)
16
+ if not model_config:
17
+ log_message(f"Модель {model_name} не найдена, использую модель по умолчанию")
18
+ model_config = AVAILABLE_MODELS[DEFAULT_MODEL]
19
+
20
+ if not model_config.get("api_key"):
21
+ raise Exception(f"API ключ не найден для модели {model_name}")
22
+
23
+ if model_config["provider"] == "google":
24
+ return GoogleGenAI(
25
+ model=model_config["model_name"],
26
+ api_key=model_config["api_key"]
27
+ )
28
+ elif model_config["provider"] == "openai":
29
+ return OpenAI(
30
+ model=model_config["model_name"],
31
+ api_key=model_config["api_key"]
32
+ )
33
+ else:
34
+ raise Exception(f"Неподдерживаемый провайдер: {model_config['provider']}")
35
+
36
+ except Exception as e:
37
+ log_message(f"Ошибка создания модели {model_name}: {str(e)}")
38
+ return GoogleGenAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY)
39
 
40
  def get_embedding_model(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"):
41
  return HuggingFaceEmbedding(model_name=model_name)
 
43
  def get_reranker_model(model_name='cross-encoder/ms-marco-MiniLM-L-12-v2'):
44
  return CrossEncoder(model_name)
45
 
46
+ def format_context_for_llm(nodes):
47
+ context_parts = []
48
+
49
  for node in nodes:
50
+ metadata = node.metadata if hasattr(node, 'metadata') else {}
51
+ doc_id = metadata.get('document_id', 'Неизвестный документ')
 
52
 
53
+ section_info = ""
54
+
55
+ # Handle section information with proper hierarchy
56
+ if metadata.get('section_path'):
57
+ section_path = metadata['section_path']
58
+ section_text = metadata.get('section_text', '')
59
+ parent_section = metadata.get('parent_section', '')
60
+ parent_title = metadata.get('parent_title', '')
61
+ level = metadata.get('level', '')
62
+
63
+ if level in ['subsection', 'sub_subsection', 'sub_sub_subsection'] and parent_section and parent_title:
64
+ # For subsections: раздел X (Title), пункт X.X
65
+ if section_text:
66
+ section_info = f"раздел {parent_section} ({parent_title}), пункт {section_path} ({section_text})"
67
+ else:
68
+ section_info = f"раздел {parent_section} ({parent_title}), пункт {section_path}"
69
+ elif section_text:
70
+ # For main sections: раздел X (Title)
71
+ section_info = f"раздел {section_path} ({section_text})"
72
+ else:
73
+ section_info = f"раздел {section_path}"
74
+
75
+ elif metadata.get('section_id'):
76
+ section_id = metadata['section_id']
77
+ section_text = metadata.get('section_text', '')
78
+ level = metadata.get('level', '')
79
+ parent_section = metadata.get('parent_section', '')
80
+ parent_title = metadata.get('parent_title', '')
81
+
82
+ if level in ['subsection', 'sub_subsection', 'sub_sub_subsection'] and parent_section and parent_title:
83
+ if section_text:
84
+ section_info = f"раздел {parent_section} ({parent_title}), пункт {section_id} ({section_text})"
85
+ else:
86
+ section_info = f"раздел {parent_section} ({parent_title}), пункт {section_id}"
87
+ elif section_text:
88
+ section_info = f"раздел {section_id} ({section_text})"
89
+ else:
90
+ section_info = f"раздел {section_id}"
91
+
92
+ # Override with table/image info if applicable
93
+ if metadata.get('type') == 'table' and metadata.get('table_number'):
94
+ table_num = metadata['table_number']
95
+ if not str(table_num).startswith('№'):
96
+ table_num = f"№{table_num}"
97
+ table_title = metadata.get('table_title', '')
98
+ # Include section context for tables
99
+ base_section = ""
100
+ if metadata.get('section_path'):
101
+ base_section = f", раздел {metadata['section_path']}"
102
+ elif metadata.get('section_id'):
103
+ base_section = f", раздел {metadata['section_id']}"
104
+
105
+ if table_title:
106
+ section_info = f"Таблица {table_num} ({table_title}){base_section}"
107
+ else:
108
+ section_info = f"Таблица {table_num}{base_section}"
109
+
110
+ if metadata.get('type') == 'image' and metadata.get('image_number'):
111
+ image_num = metadata['image_number']
112
+ if not str(image_num).startswith('№'):
113
+ image_num = f"№{image_num}"
114
+ image_title = metadata.get('image_title', '')
115
+ # Include section context for images
116
+ base_section = ""
117
+ if metadata.get('section_path'):
118
+ base_section = f", раздел {metadata['section_path']}"
119
+ elif metadata.get('section_id'):
120
+ base_section = f", раздел {metadata['section_id']}"
121
+
122
+ if image_title:
123
+ section_info = f"Рисунок {image_num} ({image_title}){base_section}"
124
+ else:
125
+ section_info = f"Рисунок {image_num}{base_section}"
126
+
127
+ context_text = node.text if hasattr(node, 'text') else str(node)
128
+
129
+ if section_info:
130
+ formatted_context = f"[ИСТОЧНИК: {section_info}, документ {doc_id}]\n{context_text}\n"
131
  else:
132
+ formatted_context = f"[ИСТОЧНИК: документ {doc_id}]\n{context_text}\n"
133
+
134
+ context_parts.append(formatted_context)
135
 
136
+ return "\n".join(context_parts)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
+ def generate_sources_html(nodes, chunks_df=None):
140
+ html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; max-height: 400px; overflow-y: auto;'>"
141
+ html += "<h3 style='color: #63b3ed; margin-top: 0;'>Источники:</h3>"
142
 
143
+ sources_by_doc = {}
 
 
 
144
 
145
+ for i, node in enumerate(nodes):
146
+ metadata = node.metadata if hasattr(node, 'metadata') else {}
147
+ doc_type = metadata.get('type', 'text')
148
+ doc_id = metadata.get('document_id', 'unknown')
149
+ section_id = metadata.get('section_id', '')
150
+ section_text = metadata.get('section_text', '')
151
+ section_path = metadata.get('section_path', '')
152
+
153
+ # Create a unique key for grouping
154
+ if doc_type == 'table':
155
+ table_num = metadata.get('table_number', 'unknown')
156
+ key = f"{doc_id}_table_{table_num}"
157
+ elif doc_type == 'image':
158
+ image_num = metadata.get('image_number', 'unknown')
159
+ key = f"{doc_id}_image_{image_num}"
160
+ else:
161
+ # For text documents, group by section path or section id
162
+ section_key = section_path if section_path else section_id
163
+ key = f"{doc_id}_text_{section_key}"
164
+
165
+ if key not in sources_by_doc:
166
+ sources_by_doc[key] = {
167
+ 'doc_id': doc_id,
168
+ 'doc_type': doc_type,
169
+ 'metadata': metadata,
170
+ 'sections': set()
171
+ }
172
+
173
+ # Add section information
174
+ if section_path:
175
+ sources_by_doc[key]['sections'].add(f"пункт {section_path}")
176
+ elif section_id and section_id != 'unknown':
177
+ sources_by_doc[key]['sections'].add(f"пункт {section_id}")
178
 
179
+ # Generate HTML for each unique source
180
+ for source_info in sources_by_doc.values():
181
+ metadata = source_info['metadata']
182
+ doc_type = source_info['doc_type']
183
+ doc_id = source_info['doc_id']
184
+
185
+ html += f"<div style='margin-bottom: 15px; padding: 15px; border: 1px solid #4a5568; border-radius: 8px; background-color: #1a202c;'>"
186
+
187
+ if doc_type == 'text':
188
+ html += f"<h4 style='margin: 0 0 10px 0; color: #63b3ed;'>📄 {doc_id}</h4>"
189
 
190
+ elif doc_type == 'table' or doc_type == 'table_row':
191
+ table_num = metadata.get('table_number', 'unknown')
192
+ table_title = metadata.get('table_title', '')
193
+ if table_num and table_num != 'unknown':
194
+ if not str(table_num).startswith('№'):
195
+ table_num = f"{table_num}"
196
+ html += f"<h4 style='margin: 0 0 10px 0; color: #68d391;'>📊 Таблица {table_num} - {doc_id}</h4>"
197
+ if table_title and table_title != 'unknown':
198
+ html += f"<p style='margin: 5px 0; color: #a0aec0; font-size: 14px;'>{table_title}</p>"
199
+ else:
200
+ html += f"<h4 style='margin: 0 0 10px 0; color: #68d391;'>📊 Таблица - {doc_id}</h4>"
201
+
202
+ elif doc_type == 'image':
203
+ image_num = metadata.get('image_number', 'unknown')
204
+ image_title = metadata.get('image_title', '')
205
+ section = metadata.get('section', '')
206
+ if image_num and image_num != 'unknown':
207
+ if not str(image_num).startswith('№'):
208
+ image_num = f"№{image_num}"
209
+ html += f"<h4 style='margin: 0 0 10px 0; color: #fbb6ce;'>🖼️ Изображение {image_num} - {doc_id}</h4>"
210
+ if image_title and image_title != 'unknown':
211
+ html += f"<p style='margin: 5px 0; color: #a0aec0; font-size: 14px;'>{image_title}</p>"
212
+ if section and section != 'unknown':
213
+ html += f"<p style='margin: 5px 0; color: #a0aec0; font-size: 12px;'>Раздел: {section}</p>"
214
+ else:
215
+ html += f"<h4 style='margin: 0 0 10px 0; color: #fbb6ce;'>🖼️ Изображение - {doc_id}</h4>"
216
 
217
+ # Add file link if available
218
+ if chunks_df is not None and 'file_link' in chunks_df.columns and doc_type == 'text':
219
+ doc_rows = chunks_df[chunks_df['document_id'] == doc_id]
220
+ if not doc_rows.empty:
221
+ file_link = doc_rows.iloc[0]['file_link']
222
+ html += f"<a href='{file_link}' target='_blank' style='color: #68d391; text-decoration: none; font-size: 14px; display: inline-block; margin-top: 10px;'>🔗 Ссылка на документ</a><br>"
223
 
224
+ html += "</div>"
 
 
 
 
 
 
 
 
 
225
 
226
  html += "</div>"
227
  return html
228
 
229
+
230
+ def answer_question(question, query_engine, reranker, current_model, chunks_df=None):
231
+ if query_engine is None:
232
+ return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Система не инициализирована</div>", "", ""
233
 
234
+ try:
235
+ start_time = time.time()
 
 
 
 
236
 
237
+ llm = get_llm_model(current_model)
 
 
 
 
 
 
 
238
 
239
+ # Direct retrieval without query expansion
240
+ retrieved_nodes = query_engine.retriever.retrieve(question)
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
+ log_message(f"Получено {len(retrieved_nodes)} узлов")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
+ reranked_nodes = rerank_nodes(
245
+ question,
246
+ retrieved_nodes,
247
+ reranker,
248
+ top_k=20,
249
+ min_score_threshold=0.5,
250
+ diversity_penalty=0.3
251
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
 
253
+ formatted_context = format_context_for_llm(reranked_nodes)
 
 
 
 
 
 
 
 
 
254
 
255
+ enhanced_question = f"""Контекст из базы данных:
256
+ {formatted_context}
 
257
 
258
+ Вопрос пользователя: {question}
259
 
260
+ Инструкция: Ответь на вопрос, используя ТОЛЬКО информацию из контекста выше.
261
+ Если информации недостаточно, четко укажи это. Цитируй конкретные источники."""
 
 
262
 
263
+ response = query_engine.query(enhanced_question)
 
 
264
 
265
+ end_time = time.time()
266
+ processing_time = end_time - start_time
267
+
268
+ log_message(f"Обработка завершена за {processing_time:.2f}с")
269
+
270
+ sources_html = generate_sources_html(reranked_nodes, chunks_df)
271
+
272
+ answer_with_time = f"""<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; margin-bottom: 10px;'>
273
+ <h3 style='color: #63b3ed; margin-top: 0;'>Ответ (Модель: {current_model}):</h3>
274
+ <div style='line-height: 1.6; font-size: 16px;'>{response.response}</div>
275
+ <div style='margin-top: 15px; padding-top: 10px; border-top: 1px solid #4a5568; font-size: 14px; color: #a0aec0;'>
276
+ Время обработки: {processing_time:.2f} секунд
277
+ </div>
278
+ </div>"""
279
+
280
+ chunk_info = []
281
+ for node in reranked_nodes:
282
+ metadata = node.metadata if hasattr(node, 'metadata') else {}
283
+ chunk_info.append({
284
+ 'document_id': metadata.get('document_id', 'unknown'),
285
+ 'section_id': metadata.get('section_id', metadata.get('section', 'unknown')),
286
+ 'section_path': metadata.get('section_path', ''),
287
+ 'section_text': metadata.get('section_text', ''),
288
+ 'level': metadata.get('level', ''),
289
+ 'parent_section': metadata.get('parent_section', ''),
290
+ 'parent_title': metadata.get('parent_title', ''),
291
+ 'type': metadata.get('type', 'text'),
292
+ 'table_number': metadata.get('table_number', ''),
293
+ 'image_number': metadata.get('image_number', ''),
294
+ 'chunk_size': len(node.text),
295
+ 'chunk_text': node.text
296
+ })
297
+ from app import create_chunks_display_html
298
+ chunks_html = create_chunks_display_html(chunk_info)
299
 
300
+ return answer_with_time, sources_html, chunks_html
301
+
302
+ except Exception as e:
303
+ log_message(f"Ошибка: {str(e)}")
304
+ error_msg = f"<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Ошибка: {str(e)}</div>"
305
+ return error_msg, "", ""