RAG_AIEXP_01 / chat_handler.py
MrSimple07's picture
added text_json files + added the document prep
600d58a
raw
history blame
8.5 kB
import time
import logging
logger = logging.getLogger(__name__)
def log_message(message):
logger.info(message)
print(message, flush=True)
class ChatHandler:
def __init__(self, index_retriever):
self.index_retriever = index_retriever
self.chat_history = []
def format_section_path(self, metadata):
parts = []
section_id = metadata.get('section_id')
if section_id and section_id != 'Unknown':
parts.append(section_id)
subsection_id = metadata.get('subsection_id')
if subsection_id and subsection_id != 'Unknown':
parts.append(subsection_id)
sub_subsection_id = metadata.get('sub_subsection_id')
if sub_subsection_id and sub_subsection_id != 'Unknown':
parts.append(sub_subsection_id)
sub_sub_subsection_id = metadata.get('sub_sub_subsection_id')
if sub_sub_subsection_id and sub_sub_subsection_id != 'Unknown':
parts.append(sub_sub_subsection_id)
return " → ".join(parts) if parts else "Основной раздел"
def generate_sources_html(self, nodes):
html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; max-height: 400px; overflow-y: auto;'>"
html += "<h3 style='color: #63b3ed; margin-top: 0;'>Источники:</h3>"
for i, node in enumerate(nodes):
metadata = node.metadata if hasattr(node, 'metadata') else {}
doc_type = metadata.get('type', 'text')
doc_id = metadata.get('document_id', 'unknown')
html += f"<div style='margin-bottom: 15px; padding: 15px; border: 1px solid #4a5568; border-radius: 8px; background-color: #1a202c;'>"
if doc_type == 'text':
section_path = self.format_section_path(metadata)
document_name = metadata.get('document_name', doc_id)
level = metadata.get('level', 'section')
html += f"<h4 style='margin: 0 0 10px 0; color: #63b3ed;'>📄 {doc_id}</h4>"
html += f"<div style='color: #a0aec0; font-size: 13px; margin-bottom: 8px;'>{document_name}</div>"
html += f"<div style='color: #68d391; font-size: 14px; margin-bottom: 5px;'>📍 {section_path}</div>"
html += f"<div style='color: #fbb6ce; font-size: 12px;'>Уровень: {level}</div>"
elif doc_type == 'table':
table_num = metadata.get('table_number', 'unknown')
section = metadata.get('section', '')
if table_num and table_num != 'unknown':
if not table_num.startswith('№'):
table_num = f"№{table_num}"
html += f"<h4 style='margin: 0 0 10px 0; color: #68d391;'>📊 Таблица {table_num} - {doc_id}</h4>"
else:
html += f"<h4 style='margin: 0 0 10px 0; color: #68d391;'>📊 Таблица - {doc_id}</h4>"
if section:
html += f"<div style='color: #68d391; font-size: 14px;'>📍 {section}</div>"
elif doc_type == 'image':
image_num = metadata.get('image_number', 'unknown')
section = metadata.get('section', '')
if image_num and image_num != 'unknown':
if not str(image_num).startswith('№'):
image_num = f"№{image_num}"
html += f"<h4 style='margin: 0 0 10px 0; color: #fbb6ce;'>🖼️ Изображение {image_num} - {doc_id}</h4>"
else:
html += f"<h4 style='margin: 0 0 10px 0; color: #fbb6ce;'>🖼️ Изображение - {doc_id}</h4>"
if section:
html += f"<div style='color: #fbb6ce; font-size: 14px;'>📍 {section}</div>"
html += "</div>"
html += "</div>"
return html
def answer_question(self, question):
if not self.index_retriever.is_initialized():
return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Система не инициализирована</div>", ""
try:
log_message(f"Получен вопрос: {question}")
current_model = self.index_retriever.get_current_model()
log_message(f"Используется модель: {current_model}")
start_time = time.time()
retrieved_nodes = self.index_retriever.retrieve_nodes(question)
if not retrieved_nodes:
return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Не удалось найти релевантные документы</div>", ""
log_message(f"Отправляю запрос в LLM с {len(retrieved_nodes)} узлами")
response = self.index_retriever.query_engine.query(question)
end_time = time.time()
processing_time = end_time - start_time
log_message(f"Обработка завершена за {processing_time:.2f} секунд")
self.chat_history.append({
"question": question,
"answer": response.response,
"model": current_model,
"processing_time": processing_time,
"nodes_count": len(retrieved_nodes)
})
sources_html = self.generate_sources_html(retrieved_nodes)
answer_with_time = f"""<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; margin-bottom: 10px;'>
<h3 style='color: #63b3ed; margin-top: 0;'>Ответ (Модель: {current_model}):</h3>
<div style='line-height: 1.6; font-size: 16px;'>{response.response}</div>
<div style='margin-top: 15px; padding-top: 10px; border-top: 1px solid #4a5568; font-size: 14px; color: #a0aec0;'>
Время обработки: {processing_time:.2f} секунд | Источников: {len(retrieved_nodes)}
</div>
</div>"""
return answer_with_time, sources_html
except Exception as e:
log_message(f"Ошибка обработки вопроса: {str(e)}")
error_msg = f"<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Ошибка обработки вопроса: {str(e)}</div>"
return error_msg, ""
def get_chat_history(self):
return self.chat_history
def clear_history(self):
self.chat_history = []
log_message("История чата очищена")
def get_history_html(self):
if not self.chat_history:
return "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; text-align: center;'>История пуста</div>"
html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; max-height: 500px; overflow-y: auto;'>"
html += "<h3 style='color: #63b3ed; margin-top: 0;'>История чата:</h3>"
for i, entry in enumerate(reversed(self.chat_history[-10:])):
html += f"<div style='margin-bottom: 20px; padding: 15px; border: 1px solid #4a5568; border-radius: 8px; background-color: #1a202c;'>"
html += f"<div style='color: #68d391; font-weight: bold; margin-bottom: 8px;'>Вопрос {len(self.chat_history) - i}:</div>"
html += f"<div style='margin-bottom: 10px; font-size: 14px;'>{entry['question']}</div>"
html += f"<div style='color: #63b3ed; font-weight: bold; margin-bottom: 8px;'>Ответ ({entry['model']}):</div>"
html += f"<div style='margin-bottom: 10px; font-size: 14px; line-height: 1.4;'>{entry['answer'][:300]}{'...' if len(entry['answer']) > 300 else ''}</div>"
html += f"<div style='color: #a0aec0; font-size: 12px;'>Время: {entry['processing_time']:.2f}с | Источников: {entry['nodes_count']}</div>"
html += "</div>"
html += "</div>"
return html