Spaces:

MrSimple01
/

RAG_AIEXP_01

Sleeping

App Files Files Community

MrSimple07 commited on Aug 17, 2025

Commit

be82b2e

1 Parent(s): 65c4610

added new prompt + table showing improvement

Browse files

Files changed (1) hide show

app.py +41 -32

app.py CHANGED Viewed

@@ -31,7 +31,16 @@ CUSTOM_PROMPT_NEW = """
 История чата:
 {chat_history}
-ОПРЕДЕЛЕНИЕ ТИПА ЗАДАЧИ:
 Проанализируйте запрос пользователя и определите тип задачи:
 1. КРАТКОЕ САММАРИ (ключевые слова: "кратко", "суммировать", "резюме", "основные моменты", "в двух словах"):
@@ -117,20 +126,23 @@ def table_to_document(table_json):
     description = table_json.get("table_description", "")
     headers = table_json.get("headers", [])
-    table_text = f"Таблица {table_json.get('table_number', '')} - {table_json.get('table_title', '')}\n"
-    table_text += f"Документ: {document_id}\n"
-    table_text += f"Раздел: {table_json.get('section', '')}\n"
-    table_text += f"Описание: {description}\n"
     if headers:
-        table_text += f"Заголовки: {' | '.join(headers)}\n"
     data = table_json.get("data", [])
     if data:
-        for row in data:
             if isinstance(row, dict):
                 row_str = " | ".join([f"{k}: {v}" for k,v in row.items()])
-                table_text += f"{row_str}\n"
     return Document(text=table_text, metadata=metadata)
@@ -195,7 +207,6 @@ def download_table_data():
 def improve_query_with_history(question, chat_history_list):
-    """Улучшает запрос с учетом истории чата"""
     try:
         log_message("🔄 Улучшение запроса с учетом истории...")
@@ -205,22 +216,22 @@ def improve_query_with_history(question, chat_history_list):
         history_context = ""
         for i, (user_msg, bot_msg) in enumerate(chat_history_list[-3:], 1):
-            history_context += f"Сообщение {i}:\nПользователь: {user_msg}\nАссистент: {bot_msg}\n\n"
-        improvement_prompt = f"""
-Ты помощник для улучшения поисковых запросов. Проанализируй историю чата и текущий вопрос пользователя.
-История чата:
 {history_context}
-Текущий вопрос: {question}
-Если текущий вопрос неполный или ссылается на что-то из истории (например, "что это значит?", "а это что?", "объясните это"),
-то создай улучшенный запрос, который включает контекст из истории.
-Если вопрос самодостаточный, верни его без изменений.
-Улучшенный запрос:"""
         from llama_index.llms.google_genai import GoogleGenAI
         llm = GoogleGenAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY)
@@ -315,15 +326,6 @@ def answer_question(question, history):
             bot_response = llm.complete(russian_prompt).text.strip()
             log_message(f"🔄 Исправленный ответ на русском: {bot_response[:100]}...")
-        # Добавляем информацию о документе если есть метаданные
-        if retrieved_nodes and hasattr(retrieved_nodes[0], 'metadata'):
-            metadata = retrieved_nodes[0].metadata
-            document_id = metadata.get('document_id', '')
-            if document_id and document_id != 'unknown':
-                if not bot_response.endswith('.'):
-                    bot_response += '.'
-                bot_response += f"\n\nПодробнее об этом можно узнать в документе {document_id}."
         # Обновляем историю чата
         chat_history.append((question, bot_response))
@@ -451,6 +453,9 @@ def generate_sources_html(nodes):
         unique_docs[doc_id].append(node)
     for doc_id, doc_nodes in unique_docs.items():
         file_link = None
         if chunks_df is not None and 'file_link' in chunks_df.columns:
             doc_rows = chunks_df[chunks_df['document_id'] == doc_id]
@@ -463,10 +468,14 @@ def generate_sources_html(nodes):
         if file_link:
             html += f"<a href='{file_link}' target='_blank' style='color: #68d391; text-decoration: none; font-size: 14px; display: inline-block; margin-bottom: 10px;'>🔗 Ссылка на документ</a><br>"
-        for node in doc_nodes[:3]:
-            metadata = node.metadata if hasattr(node, 'metadata') else {}
-            if 'table_number' in metadata:
-                html += f"<p style='font-size: 12px; color: #a0aec0; margin: 5px 0;'>📊 Таблица {metadata['table_number']}: {metadata.get('table_title', 'Без названия')}</p>"
         html += "</div>"

 История чата:
 {chat_history}
+ИНСТРУКЦИИ ПО ОБРАБОТКЕ КОНТЕКСТА:
+1. АНАЛИЗ ТАБЛИЧНЫХ ДАННЫХ:
+   - Если в контексте есть информация начинающаяся с "Таблица", внимательно изучите её содержимое
+   - Извлекайте данные из строк с заголовками и данными таблицы
+   - Указывайте номер и название таблицы при ответе
+   - Структурируйте ответ на основе табличных данных
+2. ОПРЕДЕЛЕНИЕ ТИПА ЗАДАЧИ:
 Проанализируйте запрос пользователя и определите тип задачи:
 1. КРАТКОЕ САММАРИ (ключевые слова: "кратко", "суммировать", "резюме", "основные моменты", "в двух словах"):
     description = table_json.get("table_description", "")
     headers = table_json.get("headers", [])
+    table_text = f"ТАБЛИЦА: {table_json.get('table_number', '')} - {table_json.get('table_title', '')}\n"
+    table_text += f"ДОКУМЕНТ: {document_id}\n"
+    table_text += f"РАЗДЕЛ: {table_json.get('section', '')}\n"
+    if description:
+        table_text += f"ОПИСАНИЕ: {description}\n"
     if headers:
+        table_text += f"ЗАГОЛОВКИ ТАБЛИЦЫ: {' | '.join(headers)}\n"
     data = table_json.get("data", [])
     if data:
+        table_text += "ДАННЫЕ ТАБЛИЦЫ:\n"
+        for i, row in enumerate(data):
             if isinstance(row, dict):
                 row_str = " | ".join([f"{k}: {v}" for k,v in row.items()])
+                table_text += f"Строка {i+1}: {row_str}\n"
     return Document(text=table_text, metadata=metadata)
 def improve_query_with_history(question, chat_history_list):
     try:
         log_message("🔄 Улучшение запроса с учетом истории...")
         history_context = ""
         for i, (user_msg, bot_msg) in enumerate(chat_history_list[-3:], 1):
+            history_context += f"Вопрос {i}: {user_msg}\nОтвет {i}: {bot_msg[:200]}...\n\n"
+        improvement_prompt = f"""Проанализируй историю диалога и улучши текущий запрос пользователя.
+ИСТОРИЯ ДИАЛОГА:
 {history_context}
+ТЕКУЩИЙ ЗАПРОС: {question}
+ПРАВИЛА:
+1. Если запрос неполный или ссылается на предыдущий контекст (например: "что это", "о чем это", "объясни это"), дополни его информацией из истории
+2. Если запрос самодостаточный, верни его без изменений
+3. Сохраняй ключевые термины и названия документов из истории
+4. Отвечай только улучшенным запросом без дополнительных пояснений
+УЛУЧШЕННЫЙ ЗАПРОС:"""
         from llama_index.llms.google_genai import GoogleGenAI
         llm = GoogleGenAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY)
             bot_response = llm.complete(russian_prompt).text.strip()
             log_message(f"🔄 Исправленный ответ на русском: {bot_response[:100]}...")
         # Обновляем историю чата
         chat_history.append((question, bot_response))
         unique_docs[doc_id].append(node)
     for doc_id, doc_nodes in unique_docs.items():
+        if doc_id == 'unknown' or doc_id == 'Раздел документа':
+            continue
         file_link = None
         if chunks_df is not None and 'file_link' in chunks_df.columns:
             doc_rows = chunks_df[chunks_df['document_id'] == doc_id]
         if file_link:
             html += f"<a href='{file_link}' target='_blank' style='color: #68d391; text-decoration: none; font-size: 14px; display: inline-block; margin-bottom: 10px;'>🔗 Ссылка на документ</a><br>"
+        table_nodes = [node for node in doc_nodes if 'table_number' in node.metadata]
+        if table_nodes:
+            for node in table_nodes[:3]:
+                metadata = node.metadata
+                table_num = metadata.get('table_number', '')
+                table_title = metadata.get('table_title', 'Без названия')
+                if table_num and table_title != 'Без названия':
+                    html += f"<p style='font-size: 12px; color: #a0aec0; margin: 5px 0;'>📊 {table_num}: {table_title}</p>"
         html += "</div>"