Spaces:
Sleeping
Sleeping
Commit ·
5e35433
1
Parent(s): 73dd9ce
only semantic search top k = 30, cut off = 0.7
Browse files- index_retriever.py +9 -11
- utils.py +6 -26
index_retriever.py
CHANGED
|
@@ -12,28 +12,26 @@ def create_vector_index(documents):
|
|
| 12 |
|
| 13 |
def create_query_engine(vector_index):
|
| 14 |
try:
|
| 15 |
-
# --- Semantic-only retriever ---
|
| 16 |
vector_retriever = VectorIndexRetriever(
|
| 17 |
-
index=vector_index,
|
| 18 |
-
similarity_top_k=30,
|
| 19 |
-
similarity_cutoff=0.
|
| 20 |
)
|
| 21 |
-
|
| 22 |
custom_prompt_template = PromptTemplate(PROMPT_SIMPLE_POISK)
|
| 23 |
-
|
| 24 |
response_synthesizer = get_response_synthesizer(
|
| 25 |
response_mode=ResponseMode.TREE_SUMMARIZE,
|
| 26 |
text_qa_template=custom_prompt_template
|
| 27 |
)
|
| 28 |
-
|
| 29 |
query_engine = RetrieverQueryEngine(
|
| 30 |
retriever=vector_retriever,
|
| 31 |
response_synthesizer=response_synthesizer
|
| 32 |
)
|
| 33 |
-
|
| 34 |
-
log_message("
|
| 35 |
return query_engine
|
| 36 |
-
|
| 37 |
except Exception as e:
|
| 38 |
log_message(f"Ошибка создания query engine: {str(e)}")
|
| 39 |
-
raise
|
|
|
|
| 12 |
|
| 13 |
def create_query_engine(vector_index):
|
| 14 |
try:
|
|
|
|
| 15 |
vector_retriever = VectorIndexRetriever(
|
| 16 |
+
index=vector_index,
|
| 17 |
+
similarity_top_k=30,
|
| 18 |
+
similarity_cutoff=0.7
|
| 19 |
)
|
| 20 |
+
|
| 21 |
custom_prompt_template = PromptTemplate(PROMPT_SIMPLE_POISK)
|
|
|
|
| 22 |
response_synthesizer = get_response_synthesizer(
|
| 23 |
response_mode=ResponseMode.TREE_SUMMARIZE,
|
| 24 |
text_qa_template=custom_prompt_template
|
| 25 |
)
|
| 26 |
+
|
| 27 |
query_engine = RetrieverQueryEngine(
|
| 28 |
retriever=vector_retriever,
|
| 29 |
response_synthesizer=response_synthesizer
|
| 30 |
)
|
| 31 |
+
|
| 32 |
+
log_message("Query engine успешно создан (только векторный поиск)")
|
| 33 |
return query_engine
|
| 34 |
+
|
| 35 |
except Exception as e:
|
| 36 |
log_message(f"Ошибка создания query engine: {str(e)}")
|
| 37 |
+
raise
|
utils.py
CHANGED
|
@@ -6,7 +6,7 @@ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
|
| 6 |
from sentence_transformers import CrossEncoder
|
| 7 |
from config import AVAILABLE_MODELS, DEFAULT_MODEL, GOOGLE_API_KEY
|
| 8 |
import time
|
| 9 |
-
from index_retriever import rerank_nodes
|
| 10 |
from my_logging import log_message
|
| 11 |
from config import PROMPT_SIMPLE_POISK
|
| 12 |
|
|
@@ -260,31 +260,11 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
|
|
| 260 |
|
| 261 |
llm = get_llm_model(current_model)
|
| 262 |
|
| 263 |
-
|
| 264 |
|
| 265 |
-
|
| 266 |
-
seen_node_ids = set()
|
| 267 |
|
| 268 |
-
|
| 269 |
-
retrieved = query_engine.retriever.retrieve(query_var)
|
| 270 |
-
for node in retrieved:
|
| 271 |
-
node_id = f"{node.node_id if hasattr(node, 'node_id') else hash(node.text)}"
|
| 272 |
-
if node_id not in seen_node_ids:
|
| 273 |
-
all_nodes.append(node)
|
| 274 |
-
seen_node_ids.add(node_id)
|
| 275 |
-
|
| 276 |
-
log_message(f"Получено {len(all_nodes)} уникальных узлов из {len(query_variations)} запросов")
|
| 277 |
-
|
| 278 |
-
reranked_nodes = rerank_nodes(
|
| 279 |
-
question,
|
| 280 |
-
all_nodes,
|
| 281 |
-
reranker,
|
| 282 |
-
top_k=20,
|
| 283 |
-
min_score_threshold=0.5,
|
| 284 |
-
diversity_penalty=0.3
|
| 285 |
-
)
|
| 286 |
-
|
| 287 |
-
formatted_context = format_context_for_llm(reranked_nodes)
|
| 288 |
|
| 289 |
enhanced_question = f"""Контекст из базы данных:
|
| 290 |
{formatted_context}
|
|
@@ -301,7 +281,7 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
|
|
| 301 |
|
| 302 |
log_message(f"Обработка завершена за {processing_time:.2f}с")
|
| 303 |
|
| 304 |
-
sources_html = generate_sources_html(
|
| 305 |
|
| 306 |
answer_with_time = f"""<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; margin-bottom: 10px;'>
|
| 307 |
<h3 style='color: #63b3ed; margin-top: 0;'>Ответ (Модель: {current_model}):</h3>
|
|
@@ -312,7 +292,7 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
|
|
| 312 |
</div>"""
|
| 313 |
|
| 314 |
chunk_info = []
|
| 315 |
-
for node in
|
| 316 |
metadata = node.metadata if hasattr(node, 'metadata') else {}
|
| 317 |
chunk_info.append({
|
| 318 |
'document_id': metadata.get('document_id', 'unknown'),
|
|
|
|
| 6 |
from sentence_transformers import CrossEncoder
|
| 7 |
from config import AVAILABLE_MODELS, DEFAULT_MODEL, GOOGLE_API_KEY
|
| 8 |
import time
|
| 9 |
+
# from index_retriever import rerank_nodes
|
| 10 |
from my_logging import log_message
|
| 11 |
from config import PROMPT_SIMPLE_POISK
|
| 12 |
|
|
|
|
| 260 |
|
| 261 |
llm = get_llm_model(current_model)
|
| 262 |
|
| 263 |
+
retrieved_nodes = query_engine.retriever.retrieve(question)
|
| 264 |
|
| 265 |
+
log_message(f"Получено {len(retrieved_nodes)} узлов")
|
|
|
|
| 266 |
|
| 267 |
+
formatted_context = format_context_for_llm(retrieved_nodes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
|
| 269 |
enhanced_question = f"""Контекст из базы данных:
|
| 270 |
{formatted_context}
|
|
|
|
| 281 |
|
| 282 |
log_message(f"Обработка завершена за {processing_time:.2f}с")
|
| 283 |
|
| 284 |
+
sources_html = generate_sources_html(retrieved_nodes, chunks_df)
|
| 285 |
|
| 286 |
answer_with_time = f"""<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; margin-bottom: 10px;'>
|
| 287 |
<h3 style='color: #63b3ed; margin-top: 0;'>Ответ (Модель: {current_model}):</h3>
|
|
|
|
| 292 |
</div>"""
|
| 293 |
|
| 294 |
chunk_info = []
|
| 295 |
+
for node in retrieved_nodes :
|
| 296 |
metadata = node.metadata if hasattr(node, 'metadata') else {}
|
| 297 |
chunk_info.append({
|
| 298 |
'document_id': metadata.get('document_id', 'unknown'),
|