Spaces:
Sleeping
Sleeping
Commit
·
2370c98
1
Parent(s):
3ac0ce6
top_k = 15 + added the logg messages + added section id + text
Browse files
utils.py
CHANGED
|
@@ -51,16 +51,9 @@ def format_context_for_llm(nodes):
|
|
| 51 |
doc_id = metadata.get('document_id', 'Неизвестный документ')
|
| 52 |
|
| 53 |
section_info = ""
|
| 54 |
-
if metadata.get('
|
| 55 |
-
section_info = f"пункт {metadata['
|
| 56 |
-
|
| 57 |
-
section_info = f"пункт {metadata['paragraph']}"
|
| 58 |
-
elif metadata.get('clause'):
|
| 59 |
-
section_info = f"пункт {metadata['clause']}"
|
| 60 |
-
elif metadata.get('item'):
|
| 61 |
-
section_info = f"пункт {metadata['item']}"
|
| 62 |
-
elif metadata.get('page'):
|
| 63 |
-
section_info = f"страница {metadata['page']}"
|
| 64 |
|
| 65 |
if metadata.get('type') == 'table' and metadata.get('table_number'):
|
| 66 |
table_num = metadata['table_number']
|
|
@@ -93,7 +86,7 @@ def generate_sources_html(nodes, chunks_df=None):
|
|
| 93 |
metadata = node.metadata if hasattr(node, 'metadata') else {}
|
| 94 |
doc_type = metadata.get('type', 'text')
|
| 95 |
doc_id = metadata.get('document_id', 'unknown')
|
| 96 |
-
section_id = metadata.get('section_id', '')
|
| 97 |
|
| 98 |
html += f"<div style='margin-bottom: 15px; padding: 15px; border: 1px solid #4a5568; border-radius: 8px; background-color: #1a202c;'>"
|
| 99 |
|
|
@@ -142,20 +135,23 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
|
|
| 142 |
log_message("Извлекаю релевантные узлы")
|
| 143 |
retrieved_nodes = query_engine.retriever.retrieve(question)
|
| 144 |
log_message(f"Извлечено {len(retrieved_nodes)} узлов")
|
|
|
|
|
|
|
| 145 |
|
| 146 |
log_message("Применяю переранжировку")
|
| 147 |
-
reranked_nodes = rerank_nodes(question, retrieved_nodes, reranker, top_k=
|
| 148 |
|
| 149 |
formatted_context = format_context_for_llm(reranked_nodes)
|
|
|
|
| 150 |
|
| 151 |
enhanced_question = f"""
|
| 152 |
Контекст из базы данных:
|
| 153 |
{formatted_context}
|
| 154 |
|
| 155 |
-
Вопрос пользователя: {question}
|
| 156 |
-
Ответь на вопрос, используя только предоставленный контекст. Если ответ не найден в контексте, скажи "Информация не найдена в базе данных."""
|
| 157 |
|
| 158 |
log_message(f"Отправляю запрос в LLM с {len(reranked_nodes)} узлами")
|
|
|
|
| 159 |
response = query_engine.query(enhanced_question)
|
| 160 |
|
| 161 |
end_time = time.time()
|
|
|
|
| 51 |
doc_id = metadata.get('document_id', 'Неизвестный документ')
|
| 52 |
|
| 53 |
section_info = ""
|
| 54 |
+
if metadata.get('section_id') and metadata.get('section_text'):
|
| 55 |
+
section_info = f"пункт {metadata['section_id']} {metadata['section_text']}"
|
| 56 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
if metadata.get('type') == 'table' and metadata.get('table_number'):
|
| 59 |
table_num = metadata['table_number']
|
|
|
|
| 86 |
metadata = node.metadata if hasattr(node, 'metadata') else {}
|
| 87 |
doc_type = metadata.get('type', 'text')
|
| 88 |
doc_id = metadata.get('document_id', 'unknown')
|
| 89 |
+
section_id = metadata.get('section_id', '') + "" + metadata.get("section_text", "")
|
| 90 |
|
| 91 |
html += f"<div style='margin-bottom: 15px; padding: 15px; border: 1px solid #4a5568; border-radius: 8px; background-color: #1a202c;'>"
|
| 92 |
|
|
|
|
| 135 |
log_message("Извлекаю релевантные узлы")
|
| 136 |
retrieved_nodes = query_engine.retriever.retrieve(question)
|
| 137 |
log_message(f"Извлечено {len(retrieved_nodes)} узлов")
|
| 138 |
+
for i in range(min(3, len(retrieved_nodes))):
|
| 139 |
+
log_message(f"Пример узла {i+1}: {retrieved_nodes[i].text[:200]}...")
|
| 140 |
|
| 141 |
log_message("Применяю переранжировку")
|
| 142 |
+
reranked_nodes = rerank_nodes(question, retrieved_nodes, reranker, top_k=15)
|
| 143 |
|
| 144 |
formatted_context = format_context_for_llm(reranked_nodes)
|
| 145 |
+
log_message(f"fорматированный контекст для LLM:\n{formatted_context[:500]}...")
|
| 146 |
|
| 147 |
enhanced_question = f"""
|
| 148 |
Контекст из базы данных:
|
| 149 |
{formatted_context}
|
| 150 |
|
| 151 |
+
Вопрос пользователя: {question}"""
|
|
|
|
| 152 |
|
| 153 |
log_message(f"Отправляю запрос в LLM с {len(reranked_nodes)} узлами")
|
| 154 |
+
log_message(f"Вопрос для LLM:\n{enhanced_question}...")
|
| 155 |
response = query_engine.query(enhanced_question)
|
| 156 |
|
| 157 |
end_time = time.time()
|