MrSimple01 commited on
Commit
f415486
·
verified ·
1 Parent(s): a2e9ee2

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +289 -273
utils.py CHANGED
@@ -1,274 +1,290 @@
1
- import logging
2
- import sys
3
- from llama_index.llms.google_genai import GoogleGenAI
4
- from llama_index.llms.openai import OpenAI
5
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
6
- from sentence_transformers import CrossEncoder
7
- from config import AVAILABLE_MODELS, DEFAULT_MODEL, GOOGLE_API_KEY
8
- import time
9
- from index_retriever import rerank_nodes
10
- from my_logging import log_message
11
- from config import PROMPT_SIMPLE_POISK
12
-
13
- def get_llm_model(model_name):
14
- try:
15
- model_config = AVAILABLE_MODELS.get(model_name)
16
- if not model_config:
17
- log_message(f"Модель {model_name} не найдена, использую модель по умолчанию")
18
- model_config = AVAILABLE_MODELS[DEFAULT_MODEL]
19
-
20
- if not model_config.get("api_key"):
21
- raise Exception(f"API ключ не найден для модели {model_name}")
22
-
23
- if model_config["provider"] == "google":
24
- return GoogleGenAI(
25
- model=model_config["model_name"],
26
- api_key=model_config["api_key"]
27
- )
28
- elif model_config["provider"] == "openai":
29
- return OpenAI(
30
- model=model_config["model_name"],
31
- api_key=model_config["api_key"]
32
- )
33
- else:
34
- raise Exception(f"Неподдерживаемый провайдер: {model_config['provider']}")
35
-
36
- except Exception as e:
37
- log_message(f"Ошибка создания модели {model_name}: {str(e)}")
38
- return GoogleGenAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY)
39
-
40
- def get_embedding_model(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"):
41
- return HuggingFaceEmbedding(model_name=model_name)
42
-
43
- def get_reranker_model(model_name='cross-encoder/ms-marco-MiniLM-L-12-v2'):
44
- return CrossEncoder(model_name)
45
-
46
- def generate_sources_html(nodes, chunks_df=None):
47
- html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; max-height: 400px; overflow-y: auto;'>"
48
- html += "<h3 style='color: #63b3ed; margin-top: 0;'>Источники:</h3>"
49
-
50
- sources_by_doc = {}
51
-
52
- for i, node in enumerate(nodes):
53
- metadata = node.metadata if hasattr(node, 'metadata') else {}
54
- doc_type = metadata.get('type', 'text')
55
- doc_id = metadata.get('document_id', 'unknown')
56
-
57
- if doc_type == 'table' or doc_type == 'table_row':
58
- table_num = metadata.get('table_number', 'unknown')
59
- key = f"{doc_id}_table_{table_num}"
60
- elif doc_type == 'image':
61
- image_num = metadata.get('image_number', 'unknown')
62
- key = f"{doc_id}_image_{image_num}"
63
- else:
64
- section_path = metadata.get('section_path', '')
65
- section_id = metadata.get('section_id', '')
66
- section_key = section_path if section_path else section_id
67
- key = f"{doc_id}_text_{section_key}"
68
-
69
- if key not in sources_by_doc:
70
- sources_by_doc[key] = {
71
- 'doc_id': doc_id,
72
- 'doc_type': doc_type,
73
- 'metadata': metadata,
74
- 'sections': set()
75
- }
76
-
77
- if doc_type not in ['table', 'table_row', 'image']:
78
- section_path = metadata.get('section_path', '')
79
- section_id = metadata.get('section_id', '')
80
- if section_path:
81
- sources_by_doc[key]['sections'].add(f"пункт {section_path}")
82
- elif section_id and section_id != 'unknown':
83
- sources_by_doc[key]['sections'].add(f"пункт {section_id}")
84
-
85
- for source_info in sources_by_doc.values():
86
- metadata = source_info['metadata']
87
- doc_type = source_info['doc_type']
88
- doc_id = source_info['doc_id']
89
-
90
- html += f"<div style='margin-bottom: 15px; padding: 15px; border: 1px solid #4a5568; border-radius: 8px; background-color: #1a202c;'>"
91
-
92
- if doc_type == 'text':
93
- html += f"<h4 style='margin: 0 0 10px 0; color: #63b3ed;'>📄 {doc_id}</h4>"
94
- elif doc_type == 'table' or doc_type == 'table_row':
95
- table_num = metadata.get('table_number', 'unknown')
96
- table_title = metadata.get('table_title', '')
97
- if table_num and table_num != 'unknown':
98
- if not str(table_num).startswith('№'):
99
- table_num = f"№{table_num}"
100
- html += f"<h4 style='margin: 0 0 10px 0; color: #68d391;'>📊 Таблица {table_num} - {doc_id}</h4>"
101
- if table_title and table_title != 'unknown':
102
- html += f"<p style='margin: 5px 0; color: #a0aec0; font-size: 14px;'>{table_title}</p>"
103
- else:
104
- html += f"<h4 style='margin: 0 0 10px 0; color: #68d391;'>📊 Таблица - {doc_id}</h4>"
105
- elif doc_type == 'image':
106
- image_num = metadata.get('image_number', 'unknown')
107
- image_title = metadata.get('image_title', '')
108
- if image_num and image_num != 'unknown':
109
- if not str(image_num).startswith('№'):
110
- image_num = f"№{image_num}"
111
- html += f"<h4 style='margin: 0 0 10px 0; color: #fbb6ce;'>🖼️ Изображение {image_num} - {doc_id}</h4>"
112
- if image_title and image_title != 'unknown':
113
- html += f"<p style='margin: 5px 0; color: #a0aec0; font-size: 14px;'>{image_title}</p>"
114
-
115
- if chunks_df is not None and 'file_link' in chunks_df.columns and doc_type == 'text':
116
- doc_rows = chunks_df[chunks_df['document_id'] == doc_id]
117
- if not doc_rows.empty:
118
- file_link = doc_rows.iloc[0]['file_link']
119
- html += f"<a href='{file_link}' target='_blank' style='color: #68d391; text-decoration: none; font-size: 14px; display: inline-block; margin-top: 10px;'>🔗 Ссылка на документ</a><br>"
120
-
121
- html += "</div>"
122
-
123
- html += "</div>"
124
- return html
125
-
126
- def deduplicate_nodes(nodes):
127
- """Deduplicate retrieved nodes based on content and metadata"""
128
- seen = set()
129
- unique_nodes = []
130
-
131
- for node in nodes:
132
- doc_id = node.metadata.get('document_id', '')
133
- node_type = node.metadata.get('type', 'text')
134
-
135
- if node_type == 'table' or node_type == 'table_row':
136
- table_num = node.metadata.get('table_number', '')
137
- table_identifier = node.metadata.get('table_identifier', table_num)
138
-
139
- # Use row range to distinguish table chunks
140
- row_start = node.metadata.get('row_start', '')
141
- row_end = node.metadata.get('row_end', '')
142
- is_complete = node.metadata.get('is_complete_table', False)
143
-
144
- if is_complete:
145
- identifier = f"{doc_id}|table|{table_identifier}|complete"
146
- elif row_start != '' and row_end != '':
147
- identifier = f"{doc_id}|table|{table_identifier}|rows_{row_start}_{row_end}"
148
- else:
149
- # Fallback: use chunk_id if available
150
- chunk_id = node.metadata.get('chunk_id', '')
151
- if chunk_id != '':
152
- identifier = f"{doc_id}|table|{table_identifier}|chunk_{chunk_id}"
153
- else:
154
- # Last resort: hash first 100 chars of content
155
- import hashlib
156
- content_hash = hashlib.md5(node.text[:100].encode()).hexdigest()[:8]
157
- identifier = f"{doc_id}|table|{table_identifier}|{content_hash}"
158
-
159
- elif node_type == 'image':
160
- img_num = node.metadata.get('image_number', '')
161
- identifier = f"{doc_id}|image|{img_num}"
162
-
163
- else: # text
164
- section_id = node.metadata.get('section_id', '')
165
- chunk_id = node.metadata.get('chunk_id', 0)
166
- # For text, section_id + chunk_id should be unique
167
- identifier = f"{doc_id}|text|{section_id}|{chunk_id}"
168
-
169
- if identifier not in seen:
170
- seen.add(identifier)
171
- unique_nodes.append(node)
172
-
173
- return unique_nodes
174
-
175
- def debug_search_tables(vector_index, search_term="С-25"):
176
- """Debug function to find all tables containing a specific term"""
177
- all_nodes = list(vector_index.docstore.docs.values())
178
-
179
- matching = []
180
- for node in all_nodes:
181
- if node.metadata.get('type') == 'table':
182
- text = node.get_content()
183
- if search_term in text or search_term in node.metadata.get('table_title', ''):
184
- matching.append({
185
- 'doc_id': node.metadata.get('document_id'),
186
- 'table_num': node.metadata.get('table_number'),
187
- 'title': node.metadata.get('table_title', '')[:100]
188
- })
189
-
190
- log_message(f"\n{'='*60}")
191
- log_message(f"DEBUG: Found {len(matching)} tables containing '{search_term}'")
192
- for m in matching:
193
- log_message(f" • {m['doc_id']} - Table {m['table_num']}: {m['title']}")
194
- log_message(f"{'='*60}\n")
195
-
196
- return matching
197
-
198
- # Add this import at the top of utils.py
199
- from documents_prep import normalize_text
200
-
201
- # MODIFIED: Update answer_question function
202
- def answer_question(question, query_engine, reranker, current_model, chunks_df=None):
203
- # NORMALIZE the question to convert C to С
204
- normalized_question = normalize_text(question)
205
-
206
- if query_engine is None:
207
- return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Система не инициализирована</div>", "", ""
208
-
209
- try:
210
- start_time = time.time()
211
- # Use NORMALIZED question for retrieval
212
- retrieved_nodes = query_engine.retriever.retrieve(normalized_question)
213
- log_message(f"user query: {question}")
214
- log_message(f"normalized query: {normalized_question}")
215
-
216
-
217
- log_message(f"RETRIEVED: {len(retrieved_nodes)} nodes")
218
-
219
- unique_retrieved = deduplicate_nodes(retrieved_nodes)
220
-
221
- # DEBUG: Log what was retrieved
222
- log_message(f"RETRIEVED: unique {len(unique_retrieved)} nodes")
223
- for i, node in enumerate(unique_retrieved): # All debug
224
- table_num = node.metadata.get('table_number', 'N/A')
225
- table_title = node.metadata.get('table_title', 'N/A')
226
- doc_id = node.metadata.get('document_id', 'N/A')
227
- log_message(f" [{i+1}] {doc_id} - Table {table_num}: {table_title[:50]}")
228
- log_message(f"UNIQUE NODES: {len(unique_retrieved)} nodes")
229
-
230
- # Simple reranking with NORMALIZED question
231
- reranked_nodes = rerank_nodes(normalized_question, unique_retrieved, reranker, top_k=20)
232
-
233
- # Direct query without formatting - use normalized question
234
- response = query_engine.query(normalized_question)
235
-
236
- end_time = time.time()
237
- processing_time = end_time - start_time
238
-
239
- log_message(f"Обработка завершена за {processing_time:.2f}с")
240
-
241
- sources_html = generate_sources_html(reranked_nodes, chunks_df)
242
-
243
- answer_with_time = f"""<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; margin-bottom: 10px;'>
244
- <h3 style='color: #63b3ed; margin-top: 0;'>Ответ (Модель: {current_model}):</h3>
245
- <div style='line-height: 1.6; font-size: 16px;'>{response.response}</div>
246
- <div style='margin-top: 15px; padding-top: 10px; border-top: 1px solid #4a5568; font-size: 14px; color: #a0aec0;'>
247
- Время обработки: {processing_time:.2f} секунд
248
- </div>
249
- </div>"""
250
- log_message(f"Model Answer: {response.response}")
251
-
252
- chunk_info = []
253
- for node in reranked_nodes:
254
- metadata = node.metadata if hasattr(node, 'metadata') else {}
255
- chunk_info.append({
256
- 'document_id': metadata.get('document_id', 'unknown'),
257
- 'section_id': metadata.get('section_id', 'unknown'),
258
- 'section_path': metadata.get('section_path', ''),
259
- 'section_text': metadata.get('section_text', ''),
260
- 'type': metadata.get('type', 'text'),
261
- 'table_number': metadata.get('table_number', ''),
262
- 'image_number': metadata.get('image_number', ''),
263
- 'chunk_size': len(node.text),
264
- 'chunk_text': node.text
265
- })
266
- from app import create_chunks_display_html
267
- chunks_html = create_chunks_display_html(chunk_info)
268
-
269
- return answer_with_time, sources_html, chunks_html
270
-
271
- except Exception as e:
272
- log_message(f"Ошибка: {str(e)}")
273
- error_msg = f"<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Ошибка: {str(e)}</div>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  return error_msg, "", ""
 
1
+ import logging
2
+ import sys
3
+ from llama_index.llms.google_genai import GoogleGenAI
4
+ from llama_index.llms.openai import OpenAI
5
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
6
+ from sentence_transformers import CrossEncoder
7
+ from config import AVAILABLE_MODELS, DEFAULT_MODEL, GOOGLE_API_KEY
8
+ import time
9
+ from index_retriever import rerank_nodes
10
+ from my_logging import log_message
11
+ from config import PROMPT_SIMPLE_POISK
12
+
13
+ def get_llm_model(model_name):
14
+ try:
15
+ model_config = AVAILABLE_MODELS.get(model_name)
16
+ if not model_config:
17
+ log_message(f"Модель {model_name} не найдена, использую модель по умолчанию")
18
+ model_config = AVAILABLE_MODELS[DEFAULT_MODEL]
19
+
20
+ if not model_config.get("api_key"):
21
+ raise Exception(f"API ключ не найден для модели {model_name}")
22
+
23
+ if model_config["provider"] == "google":
24
+ return GoogleGenAI(
25
+ model=model_config["model_name"],
26
+ api_key=model_config["api_key"]
27
+ )
28
+ elif model_config["provider"] == "openai":
29
+ return OpenAI(
30
+ model=model_config["model_name"],
31
+ api_key=model_config["api_key"]
32
+ )
33
+ else:
34
+ raise Exception(f"Неподдерживаемый провайдер: {model_config['provider']}")
35
+
36
+ except Exception as e:
37
+ log_message(f"Ошибка создания модели {model_name}: {str(e)}")
38
+ return GoogleGenAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY)
39
+
40
+ def get_embedding_model(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"):
41
+ return HuggingFaceEmbedding(model_name=model_name)
42
+
43
+ def get_reranker_model(model_name='cross-encoder/ms-marco-MiniLM-L-12-v2'):
44
+ return CrossEncoder(model_name)
45
+
46
+ def generate_sources_html(nodes, chunks_df=None):
47
+ html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; max-height: 400px; overflow-y: auto;'>"
48
+ html += "<h3 style='color: #63b3ed; margin-top: 0;'>Источники:</h3>"
49
+
50
+ sources_by_doc = {}
51
+
52
+ for i, node in enumerate(nodes):
53
+ metadata = node.metadata if hasattr(node, 'metadata') else {}
54
+ doc_type = metadata.get('type', 'text')
55
+ doc_id = metadata.get('document_id', 'unknown')
56
+
57
+ if doc_type == 'table' or doc_type == 'table_row':
58
+ table_num = metadata.get('table_number', 'unknown')
59
+ key = f"{doc_id}_table_{table_num}"
60
+ elif doc_type == 'image':
61
+ image_num = metadata.get('image_number', 'unknown')
62
+ key = f"{doc_id}_image_{image_num}"
63
+ else:
64
+ section_path = metadata.get('section_path', '')
65
+ section_id = metadata.get('section_id', '')
66
+ section_key = section_path if section_path else section_id
67
+ key = f"{doc_id}_text_{section_key}"
68
+
69
+ if key not in sources_by_doc:
70
+ sources_by_doc[key] = {
71
+ 'doc_id': doc_id,
72
+ 'doc_type': doc_type,
73
+ 'metadata': metadata,
74
+ 'sections': set()
75
+ }
76
+
77
+ if doc_type not in ['table', 'table_row', 'image']:
78
+ section_path = metadata.get('section_path', '')
79
+ section_id = metadata.get('section_id', '')
80
+ if section_path:
81
+ sources_by_doc[key]['sections'].add(f"пункт {section_path}")
82
+ elif section_id and section_id != 'unknown':
83
+ sources_by_doc[key]['sections'].add(f"пункт {section_id}")
84
+
85
+ for source_info in sources_by_doc.values():
86
+ metadata = source_info['metadata']
87
+ doc_type = source_info['doc_type']
88
+ doc_id = source_info['doc_id']
89
+
90
+ html += f"<div style='margin-bottom: 15px; padding: 15px; border: 1px solid #4a5568; border-radius: 8px; background-color: #1a202c;'>"
91
+
92
+ if doc_type == 'text':
93
+ html += f"<h4 style='margin: 0 0 10px 0; color: #63b3ed;'>📄 {doc_id}</h4>"
94
+ elif doc_type == 'table' or doc_type == 'table_row':
95
+ table_num = metadata.get('table_number', 'unknown')
96
+ table_title = metadata.get('table_title', '')
97
+ if table_num and table_num != 'unknown':
98
+ if not str(table_num).startswith('№'):
99
+ table_num = f"№{table_num}"
100
+ html += f"<h4 style='margin: 0 0 10px 0; color: #68d391;'>📊 Таблица {table_num} - {doc_id}</h4>"
101
+ if table_title and table_title != 'unknown':
102
+ html += f"<p style='margin: 5px 0; color: #a0aec0; font-size: 14px;'>{table_title}</p>"
103
+ else:
104
+ html += f"<h4 style='margin: 0 0 10px 0; color: #68d391;'>📊 Таблица - {doc_id}</h4>"
105
+ elif doc_type == 'image':
106
+ image_num = metadata.get('image_number', 'unknown')
107
+ image_title = metadata.get('image_title', '')
108
+ if image_num and image_num != 'unknown':
109
+ if not str(image_num).startswith('№'):
110
+ image_num = f"№{image_num}"
111
+ html += f"<h4 style='margin: 0 0 10px 0; color: #fbb6ce;'>🖼️ Изображение {image_num} - {doc_id}</h4>"
112
+ if image_title and image_title != 'unknown':
113
+ html += f"<p style='margin: 5px 0; color: #a0aec0; font-size: 14px;'>{image_title}</p>"
114
+
115
+ if chunks_df is not None and 'file_link' in chunks_df.columns and doc_type == 'text':
116
+ doc_rows = chunks_df[chunks_df['document_id'] == doc_id]
117
+ if not doc_rows.empty:
118
+ file_link = doc_rows.iloc[0]['file_link']
119
+ html += f"<a href='{file_link}' target='_blank' style='color: #68d391; text-decoration: none; font-size: 14px; display: inline-block; margin-top: 10px;'>🔗 Ссылка на документ</a><br>"
120
+
121
+ html += "</div>"
122
+
123
+ html += "</div>"
124
+ return html
125
+
126
+ def deduplicate_nodes(nodes):
127
+ """Deduplicate retrieved nodes based on content and metadata"""
128
+ seen = set()
129
+ unique_nodes = []
130
+
131
+ for node in nodes:
132
+ doc_id = node.metadata.get('document_id', '')
133
+ node_type = node.metadata.get('type', 'text')
134
+
135
+ if node_type == 'table' or node_type == 'table_row':
136
+ table_num = node.metadata.get('table_number', '')
137
+ table_identifier = node.metadata.get('table_identifier', table_num)
138
+
139
+ # Use row range to distinguish table chunks
140
+ row_start = node.metadata.get('row_start', '')
141
+ row_end = node.metadata.get('row_end', '')
142
+ is_complete = node.metadata.get('is_complete_table', False)
143
+
144
+ if is_complete:
145
+ identifier = f"{doc_id}|table|{table_identifier}|complete"
146
+ elif row_start != '' and row_end != '':
147
+ identifier = f"{doc_id}|table|{table_identifier}|rows_{row_start}_{row_end}"
148
+ else:
149
+ # Fallback: use chunk_id if available
150
+ chunk_id = node.metadata.get('chunk_id', '')
151
+ if chunk_id != '':
152
+ identifier = f"{doc_id}|table|{table_identifier}|chunk_{chunk_id}"
153
+ else:
154
+ # Last resort: hash first 100 chars of content
155
+ import hashlib
156
+ content_hash = hashlib.md5(node.text[:100].encode()).hexdigest()[:8]
157
+ identifier = f"{doc_id}|table|{table_identifier}|{content_hash}"
158
+
159
+ elif node_type == 'image':
160
+ img_num = node.metadata.get('image_number', '')
161
+ identifier = f"{doc_id}|image|{img_num}"
162
+
163
+ else: # text
164
+ section_id = node.metadata.get('section_id', '')
165
+ chunk_id = node.metadata.get('chunk_id', 0)
166
+ # For text, section_id + chunk_id should be unique
167
+ identifier = f"{doc_id}|text|{section_id}|{chunk_id}"
168
+
169
+ if identifier not in seen:
170
+ seen.add(identifier)
171
+ unique_nodes.append(node)
172
+
173
+ return unique_nodes
174
+
175
+ def debug_search_tables(vector_index, search_term="С-25"):
176
+ """Debug function to find all tables containing a specific term"""
177
+ all_nodes = list(vector_index.docstore.docs.values())
178
+
179
+ matching = []
180
+ for node in all_nodes:
181
+ if node.metadata.get('type') == 'table':
182
+ text = node.get_content()
183
+ if search_term in text or search_term in node.metadata.get('table_title', ''):
184
+ matching.append({
185
+ 'doc_id': node.metadata.get('document_id'),
186
+ 'table_num': node.metadata.get('table_number'),
187
+ 'title': node.metadata.get('table_title', '')[:100]
188
+ })
189
+
190
+ log_message(f"\n{'='*60}")
191
+ log_message(f"DEBUG: Found {len(matching)} tables containing '{search_term}'")
192
+ for m in matching:
193
+ log_message(f" • {m['doc_id']} - Table {m['table_num']}: {m['title']}")
194
+ log_message(f"{'='*60}\n")
195
+
196
+ return matching
197
+
198
+ from documents_prep import normalize_text, normalize_steel_designations
199
+
200
+ def answer_question(question, query_engine, reranker, current_model, chunks_df=None, rerank_top_k=20):
201
+
202
+ normalized_question = normalize_text(question)
203
+ log_message(f"Normalized question: {normalized_question}")
204
+ normalized_question_2, query_changes, change_list = normalize_steel_designations(question) # FIX: 3 values
205
+ log_message(f"After steel normalization: {normalized_question_2}")
206
+ if change_list:
207
+ log_message(f"Query changes: {', '.join(change_list)}")
208
+ if query_engine is None:
209
+ return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Система не инициализирована</div>", "", ""
210
+
211
+ try:
212
+ start_time = time.time()
213
+ retrieved_nodes = query_engine.retriever.retrieve(normalized_question_2)
214
+ log_message(f"user query: {question}")
215
+ log_message(f"normalized query: {normalized_question}")
216
+ log_message(f"after steel normalization: {normalized_question_2}")
217
+ log_message(f"Steel grades normalized in query: {query_changes}")
218
+
219
+
220
+ log_message(f"RETRIEVED: {len(retrieved_nodes)} nodes")
221
+
222
+ unique_retrieved = deduplicate_nodes(retrieved_nodes)
223
+
224
+ # IMPROVED DEBUG: Log what was actually retrieved with FULL metadata
225
+ log_message(f"RETRIEVED: unique {len(unique_retrieved)} nodes")
226
+ for i, node in enumerate(unique_retrieved):
227
+ node_type = node.metadata.get('type', 'text')
228
+ doc_id = node.metadata.get('document_id', 'N/A')
229
+
230
+ if node_type == 'table':
231
+ table_num = node.metadata.get('table_number', 'N/A')
232
+ table_id = node.metadata.get('table_identifier', 'N/A')
233
+ table_title = node.metadata.get('table_title', 'N/A')
234
+ # Show first 200 chars of content to verify it's the right table
235
+ content_preview = node.text[:200].replace('\n', ' ')
236
+ log_message(f" [{i+1}] {doc_id} - Table {table_num} | ID: {table_id}")
237
+ log_message(f" Title: {table_title[:80]}")
238
+ log_message(f" Content: {content_preview}...")
239
+ else:
240
+ section = node.metadata.get('section_id', 'N/A')
241
+ log_message(f" [{i+1}] {doc_id} - Text section {section}")
242
+
243
+ log_message(f"UNIQUE NODES: {len(unique_retrieved)} nodes")
244
+
245
+ # Simple reranking with NORMALIZED question and PARAMETERIZED top_k
246
+ reranked_nodes = rerank_nodes(normalized_question, unique_retrieved, reranker,
247
+ top_k=rerank_top_k) # NOW PARAMETERIZED
248
+
249
+ # Direct query without formatting - use normalized question
250
+ response = query_engine.query(normalized_question)
251
+
252
+ end_time = time.time()
253
+ processing_time = end_time - start_time
254
+
255
+ log_message(f"Обработка завершена за {processing_time:.2f}с")
256
+
257
+ sources_html = generate_sources_html(reranked_nodes, chunks_df)
258
+
259
+ answer_with_time = f"""<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; margin-bottom: 10px;'>
260
+ <h3 style='color: #63b3ed; margin-top: 0;'>Ответ (Модель: {current_model}):</h3>
261
+ <div style='line-height: 1.6; font-size: 16px;'>{response.response}</div>
262
+ <div style='margin-top: 15px; padding-top: 10px; border-top: 1px solid #4a5568; font-size: 14px; color: #a0aec0;'>
263
+ Время обработки: {processing_time:.2f} секунд
264
+ </div>
265
+ </div>"""
266
+ log_message(f"Model Answer: {response.response}")
267
+
268
+ chunk_info = []
269
+ for node in reranked_nodes:
270
+ metadata = node.metadata if hasattr(node, 'metadata') else {}
271
+ chunk_info.append({
272
+ 'document_id': metadata.get('document_id', 'unknown'),
273
+ 'section_id': metadata.get('section_id', 'unknown'),
274
+ 'section_path': metadata.get('section_path', ''),
275
+ 'section_text': metadata.get('section_text', ''),
276
+ 'type': metadata.get('type', 'text'),
277
+ 'table_number': metadata.get('table_number', ''),
278
+ 'image_number': metadata.get('image_number', ''),
279
+ 'chunk_size': len(node.text),
280
+ 'chunk_text': node.text
281
+ })
282
+ from app import create_chunks_display_html
283
+ chunks_html = create_chunks_display_html(chunk_info)
284
+
285
+ return answer_with_time, sources_html, chunks_html
286
+
287
+ except Exception as e:
288
+ log_message(f"Ошибка: {str(e)}")
289
+ error_msg = f"<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Ошибка: {str(e)}</div>"
290
  return error_msg, "", ""