Spaces:

MrSimple01
/

RAG_AIEXP_01

Sleeping

App Files Files Community

MrSimple07 commited on Aug 17, 2025

Commit

df86177

1 Parent(s): d6c8aaf

fixed errors - promp improvement + json fix document column

Browse files

Files changed (1) hide show

app.py +72 -33

app.py CHANGED Viewed

@@ -26,6 +26,8 @@ GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
 CUSTOM_PROMPT_NEW = """
 Вы являетесь высокоспециализированным Ассистентом для анализа документов (AIEXP). Ваша цель - предоставлять точные, корректные и контекстно релевантные ответы на основе анализа нормативной документации (НД). Все ваши ответы должны основываться исключительно на предоставленном контексте без использования внешних знаний или предположений.
 История чата:
 {chat_history}
@@ -105,25 +107,32 @@ def log_message(message):
     sys.stdout.flush()
 def table_to_document(table_json):
     metadata = {
-        "document_id": table_json["document_id"],
-        "section": table_json["section"],
-        "table_number": table_json["table_number"],
-        "table_title": table_json["table_title"],
     }
-    description = table_json["table_description"]
-    headers = " | ".join(table_json["headers"])
-    rows = []
-    for row in table_json["data"]:
-        row_str = " | ".join([f"{k}: {v}" for k,v in row.items()])
-        rows.append(row_str)
-    table_text = f"Таблица {table_json['table_number']} - {table_json['table_title']}\n"
     table_text += f"Описание: {description}\n"
-    table_text += f"Заголовки: {headers}\n"
-    table_text += "\n".join(rows)
     return Document(text=table_text, metadata=metadata)
@@ -144,6 +153,8 @@ def download_table_data():
         table_documents = []
         for file_path in table_files:
             try:
                 local_path = hf_hub_download(
                     repo_id=REPO_ID,
                     filename=file_path,
@@ -155,13 +166,23 @@ def download_table_data():
                 with open(local_path, 'r', encoding='utf-8') as f:
                     table_data = json.load(f)
-                    if isinstance(table_data, list):
                         for table_json in table_data:
                             doc = table_to_document(table_json)
                             table_documents.append(doc)
-                    else:
-                        doc = table_to_document(table_data)
-                        table_documents.append(doc)
             except Exception as e:
                 log_message(f"❌ Ошибка обработки файла {file_path}: {str(e)}")
@@ -212,6 +233,7 @@ def initialize_models():
         index_faiss = faiss.read_index(faiss_index_path)
         chunks_df = pd.read_csv(chunks_csv_path)
         log_message(f"📄 Загружено {len(chunks_df)} основных чанков")
         table_documents = download_table_data()
@@ -231,7 +253,8 @@ def initialize_models():
         if text_column is None:
             text_column = chunks_df.columns[0]
-        log_message("📝 Создание документов из чанков...")
         documents = []
         for i, (_, row) in enumerate(chunks_df.iterrows()):
             doc = Document(
@@ -282,16 +305,30 @@ def answer_question(question, history):
     try:
         start_time = time.time()
         chat_history_text = format_chat_history()
         query_with_history = question
-        response = query_engine.query(query_with_history)
         retrieved_nodes = query_engine.retriever.retrieve(query_with_history)
         end_time = time.time()
         processing_time = end_time - start_time
         bot_response = response.response
         chat_history.append((question, bot_response))
         if len(chat_history) > 10:
@@ -307,6 +344,7 @@ def answer_question(question, history):
     except Exception as e:
         error_msg = f"❌ Ошибка обработки вопроса: {str(e)}"
         history.append([question, error_msg])
         return history, ""
@@ -348,8 +386,16 @@ def generate_sources_html(nodes):
 def clear_chat():
     global chat_history
     chat_history = []
     return [], ""
 def create_demo_interface():
     with gr.Blocks(title="AIEXP - AI Expert для нормативной документации", theme=gr.themes.Soft()) as demo:
@@ -397,22 +443,15 @@ def create_demo_interface():
                         value="<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; text-align: center;'>Здесь появятся источники...</div>",
                     )
-            def user_message(message, history):
-                return "", history + [[message, None]]
-            def bot_message(history):
-                if history and history[-1][1] is None:
-                    user_msg = history[-1][0]
-                    updated_history, sources = answer_question(user_msg, history[:-1])
-                    return updated_history, sources
-                return history, ""
-            msg.submit(user_message, [msg, chatbot], [msg, chatbot], queue=False).then(
-                bot_message, chatbot, [chatbot, sources_output]
             )
-            send_btn.click(user_message, [msg, chatbot], [msg, chatbot], queue=False).then(
-                bot_message, chatbot, [chatbot, sources_output]
             )
             clear_btn.click(clear_chat, outputs=[chatbot, sources_output])

 CUSTOM_PROMPT_NEW = """
 Вы являетесь высокоспециализированным Ассистентом для анализа документов (AIEXP). Ваша цель - предоставлять точные, корректные и контекстно релевантные ответы на основе анализа нормативной документации (НД). Все ваши ответы должны основываться исключительно на предоставленном контексте без использования внешних знаний или предположений.
+ВАЖНО: ВСЕ ОТВЕТЫ ДОЛЖНЫ БЫТЬ ТОЛЬКО НА РУССКОМ ЯЗЫКЕ!
 История чата:
 {chat_history}
     sys.stdout.flush()
 def table_to_document(table_json):
+    document_id = table_json.get("document_id") or table_json.get("document", "unknown")
     metadata = {
+        "document_id": document_id,
+        "section": table_json.get("section", ""),
+        "table_number": table_json.get("table_number", ""),
+        "table_title": table_json.get("table_title", ""),
     }
+    description = table_json.get("table_description", "")
+    headers = table_json.get("headers", [])
+    table_text = f"Таблица {table_json.get('table_number', '')} - {table_json.get('table_title', '')}\n"
+    table_text += f"Документ: {document_id}\n"
+    table_text += f"Раздел: {table_json.get('section', '')}\n"
     table_text += f"Описание: {description}\n"
+    if headers:
+        table_text += f"Заголовки: {' | '.join(headers)}\n"
+    data = table_json.get("data", [])
+    if data:
+        for row in data:
+            if isinstance(row, dict):
+                row_str = " | ".join([f"{k}: {v}" for k,v in row.items()])
+                table_text += f"{row_str}\n"
     return Document(text=table_text, metadata=metadata)
         table_documents = []
         for file_path in table_files:
             try:
+                log_message(f"🔄 Обработка файла: {file_path}")
                 local_path = hf_hub_download(
                     repo_id=REPO_ID,
                     filename=file_path,
                 with open(local_path, 'r', encoding='utf-8') as f:
                     table_data = json.load(f)
+                    log_message(f"📋 Структура JSON: {list(table_data.keys()) if isinstance(table_data, dict) else 'Список'}")
+                    if isinstance(table_data, dict):
+                        if 'sheets' in table_data:
+                            for sheet in table_data['sheets']:
+                                doc = table_to_document(sheet)
+                                table_documents.append(doc)
+                                log_message(f"✅ Создан документ из таблицы: {sheet.get('table_number', 'unknown')}")
+                        else:
+                            doc = table_to_document(table_data)
+                            table_documents.append(doc)
+                            log_message(f"✅ Создан документ из JSON объекта")
+                    elif isinstance(table_data, list):
                         for table_json in table_data:
                             doc = table_to_document(table_json)
                             table_documents.append(doc)
+                            log_message(f"✅ Создан документ из элемента списка")
             except Exception as e:
                 log_message(f"❌ Ошибка обработки файла {file_path}: {str(e)}")
         index_faiss = faiss.read_index(faiss_index_path)
         chunks_df = pd.read_csv(chunks_csv_path)
         log_message(f"📄 Загружено {len(chunks_df)} основных чанков")
+        log_message(f"📋 Колонки в chunks_df: {list(chunks_df.columns)}")
         table_documents = download_table_data()
         if text_column is None:
             text_column = chunks_df.columns[0]
+        log_message(f"📝 Используется колонка для текста: {text_column}")
         documents = []
         for i, (_, row) in enumerate(chunks_df.iterrows()):
             doc = Document(
     try:
         start_time = time.time()
+        log_message(f"🔍 Получен вопрос: {question}")
         chat_history_text = format_chat_history()
+        log_message(f"📜 История чата: {len(chat_history)} сообщений")
         query_with_history = question
+        log_message("🔎 Поиск релевантных чанков...")
         retrieved_nodes = query_engine.retriever.retrieve(query_with_history)
+        log_message(f"📊 Найдено {len(retrieved_nodes)} релевантных чанков")
+        for i, node in enumerate(retrieved_nodes[:3]):
+            log_message(f"📄 Чанк {i+1}: {node.text[:100]}...")
+            log_message(f"🏷️ Метаданные: {node.metadata}")
+        log_message("🤖 Отправка запроса в LLM...")
+        response = query_engine.query(query_with_history)
         end_time = time.time()
         processing_time = end_time - start_time
         bot_response = response.response
+        log_message(f"✅ Получен ответ: {bot_response[:100]}...")
         chat_history.append((question, bot_response))
         if len(chat_history) > 10:
     except Exception as e:
         error_msg = f"❌ Ошибка обработки вопроса: {str(e)}"
+        log_message(f"❌ Ошибка: {str(e)}")
         history.append([question, error_msg])
         return history, ""
 def clear_chat():
     global chat_history
     chat_history = []
+    log_message("🗑️ История чата очищена")
     return [], ""
+def handle_submit(message, history):
+    if not message.strip():
+        return history, ""
+    updated_history, sources = answer_question(message, history)
+    return updated_history, sources
 def create_demo_interface():
     with gr.Blocks(title="AIEXP - AI Expert для нормативной документации", theme=gr.themes.Soft()) as demo:
                         value="<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; text-align: center;'>Здесь появятся источники...</div>",
                     )
+            def submit_message(message, history):
+                return handle_submit(message, history)
+            msg.submit(submit_message, [msg, chatbot], [chatbot, sources_output]).then(
+                lambda: "", None, msg
             )
+            send_btn.click(submit_message, [msg, chatbot], [chatbot, sources_output]).then(
+                lambda: "", None, msg
             )
             clear_btn.click(clear_chat, outputs=[chatbot, sources_output])