Spaces:
Sleeping
Sleeping
Commit
·
3b55526
1
Parent(s):
ca9fe9c
enhanced the prompt with metadata
Browse files
utils.py
CHANGED
|
@@ -43,6 +43,48 @@ def get_embedding_model(model_name="sentence-transformers/paraphrase-multilingua
|
|
| 43 |
def get_reranker_model(model_name='cross-encoder/ms-marco-MiniLM-L-12-v2'):
|
| 44 |
return CrossEncoder(model_name)
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
def generate_sources_html(nodes, chunks_df=None):
|
| 47 |
html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; max-height: 400px; overflow-y: auto;'>"
|
| 48 |
html += "<h3 style='color: #63b3ed; margin-top: 0;'>Источники:</h3>"
|
|
@@ -92,7 +134,6 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
|
|
| 92 |
try:
|
| 93 |
log_message(f"Получен вопрос: {question}")
|
| 94 |
log_message(f"Используется модель: {current_model}")
|
| 95 |
-
log_message(f"Используется промпт: {PROMPT_SIMPLE_POISK[:200]}...")
|
| 96 |
start_time = time.time()
|
| 97 |
|
| 98 |
log_message("Извлекаю релевантные узлы")
|
|
@@ -102,8 +143,19 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
|
|
| 102 |
log_message("Применяю переранжировку")
|
| 103 |
reranked_nodes = rerank_nodes(question, retrieved_nodes, reranker, top_k=10)
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
log_message(f"Отправляю запрос в LLM с {len(reranked_nodes)} узлами")
|
| 106 |
-
response = query_engine.
|
| 107 |
|
| 108 |
end_time = time.time()
|
| 109 |
processing_time = end_time - start_time
|
|
@@ -114,7 +166,7 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
|
|
| 114 |
|
| 115 |
answer_with_time = f"""<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; margin-bottom: 10px;'>
|
| 116 |
<h3 style='color: #63b3ed; margin-top: 0;'>Ответ (Модель: {current_model}):</h3>
|
| 117 |
-
<div style='line-height: 1.6; font-size: 16px;'>{response.
|
| 118 |
<div style='margin-top: 15px; padding-top: 10px; border-top: 1px solid #4a5568; font-size: 14px; color: #a0aec0;'>
|
| 119 |
Время обработки: {processing_time:.2f} секунд
|
| 120 |
</div>
|
|
|
|
| 43 |
def get_reranker_model(model_name='cross-encoder/ms-marco-MiniLM-L-12-v2'):
|
| 44 |
return CrossEncoder(model_name)
|
| 45 |
|
| 46 |
+
def format_context_for_llm(nodes):
|
| 47 |
+
context_parts = []
|
| 48 |
+
|
| 49 |
+
for node in nodes:
|
| 50 |
+
metadata = node.metadata if hasattr(node, 'metadata') else {}
|
| 51 |
+
doc_id = metadata.get('document_id', 'Неизвестный документ')
|
| 52 |
+
|
| 53 |
+
section_info = ""
|
| 54 |
+
if metadata.get('section'):
|
| 55 |
+
section_info = f"пункт {metadata['section']}"
|
| 56 |
+
elif metadata.get('paragraph'):
|
| 57 |
+
section_info = f"пункт {metadata['paragraph']}"
|
| 58 |
+
elif metadata.get('clause'):
|
| 59 |
+
section_info = f"пункт {metadata['clause']}"
|
| 60 |
+
elif metadata.get('item'):
|
| 61 |
+
section_info = f"пункт {metadata['item']}"
|
| 62 |
+
elif metadata.get('page'):
|
| 63 |
+
section_info = f"страница {metadata['page']}"
|
| 64 |
+
|
| 65 |
+
if metadata.get('type') == 'table' and metadata.get('table_number'):
|
| 66 |
+
table_num = metadata['table_number']
|
| 67 |
+
if not str(table_num).startswith('№'):
|
| 68 |
+
table_num = f"№{table_num}"
|
| 69 |
+
section_info = f"таблица {table_num}"
|
| 70 |
+
|
| 71 |
+
if metadata.get('type') == 'image' and metadata.get('image_number'):
|
| 72 |
+
image_num = metadata['image_number']
|
| 73 |
+
if not str(image_num).startswith('№'):
|
| 74 |
+
image_num = f"№{image_num}"
|
| 75 |
+
section_info = f"рисунок {image_num}"
|
| 76 |
+
|
| 77 |
+
context_text = node.text if hasattr(node, 'text') else str(node)
|
| 78 |
+
|
| 79 |
+
if section_info:
|
| 80 |
+
formatted_context = f"[ИСТОЧНИК: {section_info} документа {doc_id}]\n{context_text}\n"
|
| 81 |
+
else:
|
| 82 |
+
formatted_context = f"[ИСТОЧНИК: документ {doc_id}]\n{context_text}\n"
|
| 83 |
+
|
| 84 |
+
context_parts.append(formatted_context)
|
| 85 |
+
|
| 86 |
+
return "\n".join(context_parts)
|
| 87 |
+
|
| 88 |
def generate_sources_html(nodes, chunks_df=None):
|
| 89 |
html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; max-height: 400px; overflow-y: auto;'>"
|
| 90 |
html += "<h3 style='color: #63b3ed; margin-top: 0;'>Источники:</h3>"
|
|
|
|
| 134 |
try:
|
| 135 |
log_message(f"Получен вопрос: {question}")
|
| 136 |
log_message(f"Используется модель: {current_model}")
|
|
|
|
| 137 |
start_time = time.time()
|
| 138 |
|
| 139 |
log_message("Извлекаю релевантные узлы")
|
|
|
|
| 143 |
log_message("Применяю переранжировку")
|
| 144 |
reranked_nodes = rerank_nodes(question, retrieved_nodes, reranker, top_k=10)
|
| 145 |
|
| 146 |
+
formatted_context = format_context_for_llm(reranked_nodes)
|
| 147 |
+
|
| 148 |
+
enhanced_question = f"""
|
| 149 |
+
Контекст из базы данных:
|
| 150 |
+
{formatted_context}
|
| 151 |
+
|
| 152 |
+
Вопрос пользователя: {question}
|
| 153 |
+
|
| 154 |
+
Инструкции: Отвечай строго на основе предоставленного контекста. Обязательно указывай источники в формате "по пункту X документа Y" или "согласно разделу X документа Y". Если информация из нескольких источников, перечисли их все.
|
| 155 |
+
"""
|
| 156 |
+
|
| 157 |
log_message(f"Отправляю запрос в LLM с {len(reranked_nodes)} узлами")
|
| 158 |
+
response = query_engine._llm.complete(enhanced_question)
|
| 159 |
|
| 160 |
end_time = time.time()
|
| 161 |
processing_time = end_time - start_time
|
|
|
|
| 166 |
|
| 167 |
answer_with_time = f"""<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; margin-bottom: 10px;'>
|
| 168 |
<h3 style='color: #63b3ed; margin-top: 0;'>Ответ (Модель: {current_model}):</h3>
|
| 169 |
+
<div style='line-height: 1.6; font-size: 16px;'>{response.text}</div>
|
| 170 |
<div style='margin-top: 15px; padding-top: 10px; border-top: 1px solid #4a5568; font-size: 14px; color: #a0aec0;'>
|
| 171 |
Время обработки: {processing_time:.2f} секунд
|
| 172 |
</div>
|