Spaces:

MrSimple01
/

RAG_AIEXP_01

Sleeping

App Files Files Community

MrSimple07 commited on Aug 16, 2025

Commit

d6c8aaf

1 Parent(s): 1f55313

new app py with chat history + json files

Browse files

Files changed (1) hide show

app.py +353 -245

app.py CHANGED Viewed

@@ -1,186 +1,386 @@
 import gradio as gr
 import time
 import sys
-from llama_index.llms.google_genai import GoogleGenAI
-from llama_index.core import Settings
 from config import *
-from document_processor import *
-from llama_index.core.chat_engine import CondensePlusContextChatEngine
-import faiss
-#new thing
 query_engine = None
 chunks_df = None
-chat_engine = None
 chat_history = []
-def answer_question(question, history):
-    global query_engine, chat_engine
-    if query_engine is None:
-        return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>❌ System not initialized or document database is empty</div>", "", history
     try:
-        start_time = time.time()
-        custom_prompt_template = PromptTemplate(CUSTOM_PROMPT_NEW)
         response_synthesizer = get_response_synthesizer(
             response_mode=ResponseMode.TREE_SUMMARIZE,
             text_qa_template=custom_prompt_template
         )
-        if chat_engine is None:
-            chat_engine = CondensePlusContextChatEngine.from_defaults(
-                retriever=query_engine.retriever,
-                response_synthesizer=response_synthesizer,
-            )
-        response = chat_engine.chat(question)
-        retrieved_nodes = query_engine.retriever.retrieve(question)
         end_time = time.time()
         processing_time = end_time - start_time
         sources_html = generate_sources_html(retrieved_nodes)
-        answer_with_time = f"""<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; margin-bottom: 10px;'>
-        <h3 style='color: #63b3ed; margin-top: 0;'>📋 Answer:</h3>
-        <div style='line-height: 1.6; font-size: 16px;'>{response.response}</div>
-        <div style='margin-top: 15px; padding-top: 10px; border-top: 1px solid #4a5568; font-size: 14px; color: #a0aec0;'>
-        ⏱️ Processing time: {processing_time:.2f} sec
-        </div>
-        </div>"""
-        new_history = history + [{"role": "user", "content": question}, {"role": "assistant", "content": response.response}]
-        if len(new_history) > 6:
-            new_history = new_history[-6:]
-        return answer_with_time, sources_html, new_history
     except Exception as e:
-        error_msg = f"<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>❌ Error processing question: {str(e)}</div>"
-        return error_msg, "", history
 def generate_sources_html(nodes):
     html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; max-height: 400px; overflow-y: auto;'>"
-    html += "<h3 style='color: #63b3ed; margin-top: 0;'>📚 Sources:</h3>"
     unique_docs = {}
     for node in nodes:
         metadata = node.metadata if hasattr(node, 'metadata') else {}
-        doc_name = metadata.get('document_name', 'unknown')
-        doc_link = metadata.get('document_link', '')
-        doc_key = f"{doc_name}||{doc_link}"
-        if doc_key not in unique_docs:
-            unique_docs[doc_key] = []
-        unique_docs[doc_key].append(node)
-    for doc_key, doc_nodes in unique_docs.items():
-        doc_name, doc_link = doc_key.split('||', 1)
         html += f"<div style='margin-bottom: 15px; padding: 15px; border: 1px solid #4a5568; border-radius: 8px; background-color: #1a202c;'>"
-        if doc_link:
-            html += f"<h4 style='margin: 0 0 10px 0; color: #63b3ed;'>📄 <a href='{doc_link}' target='_blank' style='color: #63b3ed;'>{doc_name}</a></h4>"
-        else:
-            html += f"<h4 style='margin: 0 0 10px 0; color: #63b3ed;'>📄 {doc_name}</h4>"
-        html += f"<p style='margin: 0; color: #a0aec0; font-size: 14px;'>Found relevant fragments: {len(doc_nodes)}</p>"
         html += "</div>"
     html += "</div>"
     return html
-def get_documents_display():
-    documents = get_existing_documents()
-    if not documents:
-        return "<div style='padding: 20px; text-align: center; color: #666;'>No documents in system yet</div>"
-    html = f"<div style='background-color: #f8f9fa; padding: 20px; border-radius: 10px;'>"
-    html += f"<h3 style='color: #2d3748; margin-top: 0;'>📚 {len(documents)} documents in the system:</h3>"
-    html += "<div style='max-height: 400px; overflow-y: auto;'>"
-    for i, doc_name in enumerate(documents, 1):
-        html += f"<div style='padding: 8px; margin: 5px 0; background-color: white; border-radius: 5px; border-left: 4px solid #63b3ed;'>"
-        html += f"{i}. {doc_name}"
-        html += "</div>"
-    html += "</div></div>"
-    return html
-def upload_and_process_file(files, doc_names, doc_links):
-    global query_engine, chunks_df, chat_engine
-    if not files:
-        return "No files selected", get_documents_display()
-    if len(files) != len(doc_names) or len(files) != len(doc_links):
-        return "Error: Number of files must match number of document names and links", get_documents_display()
-    existing_docs = get_existing_documents()
-    results = []
-    for i, file in enumerate(files):
-        doc_name = doc_names[i].strip() if i < len(doc_names) else ""
-        doc_link = doc_links[i].strip() if i < len(doc_links) else ""
-        if not doc_name:
-            doc_name = file.name.split('/')[-1].replace('.txt', '').replace('.pdf', '')
-        # Check if document already exists
-        if doc_name in existing_docs:
-            results.append(f"⚠️ {doc_name}: Document already exists in the system")
-            continue
-        log_message(f"🔄 Starting processing of file {i+1}/{len(files)}: {file.name}")
-        file_info, error = process_uploaded_file(file.name, file.name.split('/')[-1], doc_name, doc_link)
-        if error:
-            results.append(f"❌ {file.name.split('/')[-1]}: {error}")
-            continue
-        query_engine, chunks_df, error = add_to_vector_index(file_info['chunks'], file_info, chunks_df)
-        if error:
-            results.append(f"❌ {file_info['file_name']}: Error adding to database - {error}")
-        else:
-            results.append(f"✅ {file_info['document']}: Successfully processed and added to database")
-            log_message(f"✅ Completed processing: {file_info['document']}")
-            # Reset chat engine to include new documents
-            chat_engine = None
-    return "\n".join(results), get_documents_display()
-def create_interface():
-    with gr.Blocks(title="AIEXP - AI Expert for Regulatory Documentation", theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
         # AIEXP - Artificial Intelligence Expert
-        ## Tool for working with regulatory documentation
         """)
-        with gr.Tab("🔍 Document Search"):
-            gr.Markdown("### Ask a question about the uploaded documentation")
             with gr.Row():
-                with gr.Column(scale=3):
                     chatbot = gr.Chatbot(
-                        label="Chat History",
-                        height=400,
-                        show_label=True,
-                        type="messages"
                     )
-                    question_input = gr.Textbox(
-                        label="Your question to the knowledge base",
-                        placeholder="Enter your question about the documents...",
-                        lines=3
-                    )
-                    ask_btn = gr.Button("🔍 Find Answer", variant="primary", size="lg")
-                    clear_btn = gr.Button("🗑️ Clear History", variant="secondary")
                     gr.Examples(
                         examples=[
@@ -188,141 +388,49 @@ def create_interface():
                             "Кто несет ответственность за организацию и проведение признания протоколов испытаний продукции?",
                             "В каких случаях могут быть признаны протоколы испытаний, проведенные лабораториями, не включенными в перечисления?",
                         ],
-                        inputs=question_input
                     )
                 with gr.Column(scale=1):
-                    answer_output = gr.HTML(
-                        label="",
-                        value="<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; text-align: center;'>The answer to your question will appear here...</div>",
-                    )
                     sources_output = gr.HTML(
-                        label="",
-                        value="<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; text-align: center;'>Sources will appear here...</div>",
                     )
-            ask_btn.click(
-                fn=answer_question,
-                inputs=[question_input, chatbot],
-                outputs=[answer_output, sources_output, chatbot]
-            ).then(
-                lambda: "", inputs=None, outputs=question_input
-            )
-            question_input.submit(
-                fn=answer_question,
-                inputs=[question_input, chatbot],
-                outputs=[answer_output, sources_output, chatbot]
-            ).then(
-                lambda: "", inputs=None, outputs=question_input
-            )
-            clear_btn.click(
-                lambda: [], inputs=None, outputs=chatbot
             )
-        with gr.Tab("📚 Document Management"):
-            gr.Markdown("### Document database and adding new files")
-            with gr.Row():
-                with gr.Column(scale=2):
-                    documents_display = gr.HTML(
-                        label="Document list",
-                        value=get_documents_display()
-                    )
-                    refresh_btn = gr.Button("🔄 Refresh List", variant="secondary")
-                with gr.Column(scale=1):
-                    gr.Markdown("#### Upload new documents")
-                    gr.Markdown("Supported formats: PDF, TXT")
-                    file_upload = gr.File(
-                        file_count="multiple",
-                        file_types=[".pdf", ".txt"],
-                        label="Select files to upload"
-                    )
-                    doc_names_input = gr.Textbox(
-                        label="Document names (one per line)",
-                        placeholder="Enter document names, one per line...",
-                        lines=5
-                    )
-                    doc_links_input = gr.Textbox(
-                        label="Document links (one per line)",
-                        placeholder="Enter document links, one per line...",
-                        lines=5
-                    )
-                    upload_btn = gr.Button("📤 Upload and Process", variant="primary")
-                    upload_status = gr.Textbox(
-                        label="Upload status",
-                        lines=8,
-                        max_lines=10,
-                        interactive=False
-                    )
-            def process_names_and_links(names_text, links_text):
-                names = [name.strip() for name in names_text.split('\n') if name.strip()]
-                links = [link.strip() for link in links_text.split('\n') if link.strip()]
-                return names, links
-            upload_btn.click(
-                fn=lambda files, names, links: upload_and_process_file(
-                    files,
-                    *process_names_and_links(names, links)
-                ),
-                inputs=[file_upload, doc_names_input, doc_links_input],
-                outputs=[upload_status, documents_display]
             )
-            refresh_btn.click(
-                fn=lambda: get_documents_display(),
-                outputs=[documents_display]
-            )
     return demo
 if __name__ == "__main__":
-    try:
-        log_message("🚀 Starting AIEXP - AI Expert for Regulatory Documentation")
-        # Initialize LLM
-        llm = GoogleGenAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY)
-        Settings.llm = llm
-        # Initialize system
-        query_engine, chunks_df, success = initialize_system()
-        log_message("🌟 Starting web interface...")
-        demo = create_interface()
-        # Launch regardless of initialization success
         demo.launch(
             server_name="0.0.0.0",
             server_port=7860,
             share=True,
-            debug=False,
-            show_error=True
         )
-    except Exception as e:
-        log_message(f"❌ Startup error: {str(e)}")
-        # Create minimal interface even if there's an error
-        import gradio as gr
-        def error_interface():
-            with gr.Blocks() as demo:
-                gr.Markdown(f"# Error: {str(e)}")
-                gr.Markdown("Please check your configuration and try again.")
-            return demo
-        error_demo = error_interface()
-        error_demo.launch(
-            server_name="0.0.0.0",
-            server_port=7860,
-            share=True
-        )

 import gradio as gr
+from huggingface_hub import hf_hub_download
+import faiss
+import pandas as pd
+import os
+import json
+from llama_index.core import Document, VectorStoreIndex, Settings
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.google_genai import GoogleGenAI
+from llama_index.core.query_engine import RetrieverQueryEngine
+from llama_index.core.retrievers import VectorIndexRetriever
+from llama_index.core.response_synthesizers import get_response_synthesizer, ResponseMode
+from llama_index.core.prompts import PromptTemplate
 import time
 import sys
 from config import *
+REPO_ID = "MrSimple01/AIEXP_RAG_FILES"
+faiss_index_filename = "faiss_index.index"
+chunks_filename = "processed_chunks.csv"
+download_dir = "rag_files"
+table_data_dir = "Табличные данные_JSON"
+HF_TOKEN = os.getenv('HF_TOKEN')
+GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
+CUSTOM_PROMPT_NEW = """
+Вы являетесь высокоспециализированным Ассистентом для анализа документов (AIEXP). Ваша цель - предоставлять точные, корректные и контекстно релевантные ответы на основе анализа нормативной документации (НД). Все ваши ответы должны основываться исключительно на предоставленном контексте без использования внешних знаний или предположений.
+История чата:
+{chat_history}
+ОПРЕДЕЛЕНИЕ ТИПА ЗАДАЧИ:
+Проанализируйте запрос пользователя и определите тип задачи:
+1. КРАТКОЕ САММАРИ (ключевые слова: "кратко", "суммировать", "резюме", "основные моменты", "в двух словах"):
+   - Предоставьте структурированное резюме запрашиваемого раздела/пункта
+   - Выделите ключевые требования, процедуры или положения
+   - Используйте нумерованный список для лучшей читаемости
+   - Сохраняйте терминологию НД
+2. ПОИСК ДОКУМЕНТА И ПУНКТА (ключевые слова: "найти", "где", "какой документ", "в каком разделе", "ссылка"):
+   - Укажите конкретный документ и его структурное расположение
+   - Предоставьте точные номера разделов/подразделов/пунктов
+   - Процитируйте релевантные фрагменты
+   - Если найдено несколько документов, перечислите все с указанием специфики каждого
+3. ПРОВЕРКА КОРРЕКТНОСТИ (ключевые слова: "правильно ли", "соответствует ли", "проверить", "корректно", "нарушение"):
+   - Сопоставьте предоставленную информацию с требованиями НД
+   - Четко укажите: "СООТВЕТСТВУЕТ" или "НЕ СООТВЕТСТВУЕТ"
+   - Перечислите конкретные требования НД
+   - Укажите выявленные расхождения или подтвердите соответствие
+   - Процитируйте релевантные пункты НД
+4. ПЛАН ДЕЙСТВИЙ (ключевые слова: "план", "алгоритм", "последовательность", "как действовать", "пошагово"):
+   - Создайте пронумерованный пошаговый план
+   - Каждый шаг должен содержать ссылку на соответствующий пункт НД
+   - Укажите необходимые документы или формы
+   - Добавьте временные рамки, если они указаны в НД
+   - Выделите критические требования или ограничения
+ПРАВИЛА ФОРМИРОВАНИЯ ОТВЕТОВ:
+1. ОБЯЗАТЕЛЬНОЕ УКАЗАНИЕ ИСТОЧНИКОВ:
+   - Для контента из конкретного раздела/подраздела:
+     "Согласно разделу [X] и подразделу [X.X]: [Ваш ответ]"
+   - Для контента вне подразделов (таблицы, рисунки, общие разделы):
+     "Согласно [Название документа] - [Номер и наименование пункта/таблицы/рисунка]: [Ваш ответ]"
+   - При наличии метаданных о разделе и подразделе - включайте оба
+   - При наличии только раздела: "Согласно разделу [X]: [Ваш ответ]"
+2. СТРОГОЕ СЛЕДОВАНИЕ КОНТЕКСТУ:
+   - Если информация не найдена: "Информация по вашему запросу не была найдена в нормативной документации."
+   - Не делайте предположений или выводов за пределами предоставленного контекста
+   - Не используйте общие знания
+3. ИСПОЛЬЗОВАНИЕ ТЕРМИНОЛОГИИ НД:
+   - Применяйте официальную терминологию из документов
+   - Сохраняйте оригинальные формулировки ключевых требований
+   - При необходимости разъясняйте специальные термины на основе НД
+4. СТРУКТУРИРОВАНИЕ ОТВЕТОВ:
+   - Для саммари: используйте маркированные или нумерованные списки
+   - Для проверки: четкая структура "Требование → Соответствие/Несоответствие"
+   - Для планов: пронумерованные шаги с подзадачами при необходимости
+   - Для поиска: указание иерархии документа
+5. ДОПОЛНИТЕЛЬНЫЕ РЕКОМЕНДАЦИИ:
+   - При множественных релевантных источниках - укажите все
+   - Выделяйте критически важные требования
+   - Указывайте альтернативные процедуры, если они предусмотрены НД
+Контекст: {context_str}
+Вопрос: {query_str}
+Ответ:
+"""
 query_engine = None
 chunks_df = None
 chat_history = []
+def log_message(message):
+    print(message, flush=True)
+    sys.stdout.flush()
+def table_to_document(table_json):
+    metadata = {
+        "document_id": table_json["document_id"],
+        "section": table_json["section"],
+        "table_number": table_json["table_number"],
+        "table_title": table_json["table_title"],
+    }
+    description = table_json["table_description"]
+    headers = " | ".join(table_json["headers"])
+    rows = []
+    for row in table_json["data"]:
+        row_str = " | ".join([f"{k}: {v}" for k,v in row.items()])
+        rows.append(row_str)
+    table_text = f"Таблица {table_json['table_number']} - {table_json['table_title']}\n"
+    table_text += f"Описание: {description}\n"
+    table_text += f"Заголовки: {headers}\n"
+    table_text += "\n".join(rows)
+    return Document(text=table_text, metadata=metadata)
+def download_table_data():
+    log_message("📥 Загрузка табличных данных...")
+    from huggingface_hub import list_repo_files
+    table_files = []
     try:
+        files = list_repo_files(repo_id=REPO_ID, repo_type="dataset", token=HF_TOKEN)
+        for file in files:
+            if file.startswith(table_data_dir) and file.endswith('.json'):
+                table_files.append(file)
+        log_message(f"📊 Найдено {len(table_files)} JSON файлов с таблицами")
+        table_documents = []
+        for file_path in table_files:
+            try:
+                local_path = hf_hub_download(
+                    repo_id=REPO_ID,
+                    filename=file_path,
+                    local_dir=download_dir,
+                    repo_type="dataset",
+                    token=HF_TOKEN
+                )
+                with open(local_path, 'r', encoding='utf-8') as f:
+                    table_data = json.load(f)
+                    if isinstance(table_data, list):
+                        for table_json in table_data:
+                            doc = table_to_document(table_json)
+                            table_documents.append(doc)
+                    else:
+                        doc = table_to_document(table_data)
+                        table_documents.append(doc)
+            except Exception as e:
+                log_message(f"❌ Ошибка обработки файла {file_path}: {str(e)}")
+                continue
+        log_message(f"✅ Создано {len(table_documents)} документов из таблиц")
+        return table_documents
+    except Exception as e:
+        log_message(f"❌ Ошибка загрузки табличных данных: {str(e)}")
+        return []
+def format_chat_history():
+    if not chat_history:
+        return "История чата пуста."
+    history_text = ""
+    for i, (user_msg, bot_msg) in enumerate(chat_history[-5:], 1):
+        history_text += f"Сообщение {i}:\nПользователь: {user_msg}\nАссистент: {bot_msg}\n\n"
+    return history_text
+def initialize_models():
+    global query_engine, chunks_df
+    try:
+        log_message("🔄 Инициализация системы...")
+        os.makedirs(download_dir, exist_ok=True)
+        log_message("📥 Загрузка основных файлов...")
+        faiss_index_path = hf_hub_download(
+            repo_id=REPO_ID,
+            filename=faiss_index_filename,
+            local_dir=download_dir,
+            repo_type="dataset",
+            token=HF_TOKEN
+        )
+        chunks_csv_path = hf_hub_download(
+            repo_id=REPO_ID,
+            filename=chunks_filename,
+            local_dir=download_dir,
+            repo_type="dataset",
+            token=HF_TOKEN
+        )
+        log_message("📚 Загрузка индекса и данных...")
+        index_faiss = faiss.read_index(faiss_index_path)
+        chunks_df = pd.read_csv(chunks_csv_path)
+        log_message(f"📄 Загружено {len(chunks_df)} основных чанков")
+        table_documents = download_table_data()
+        log_message("🤖 Настройка моделей...")
+        embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
+        llm = GoogleGenAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY)
+        Settings.embed_model = embed_model
+        Settings.llm = llm
+        text_column = None
+        for col in chunks_df.columns:
+            if 'text' in col.lower() or 'content' in col.lower() or 'chunk' in col.lower():
+                text_column = col
+                break
+        if text_column is None:
+            text_column = chunks_df.columns[0]
+        log_message("📝 Создание документов из чанков...")
+        documents = []
+        for i, (_, row) in enumerate(chunks_df.iterrows()):
+            doc = Document(
+                text=str(row[text_column]),
+                metadata={
+                    "chunk_id": row.get('chunk_id', i),
+                    "document_id": row.get('document_id', 'unknown')
+                }
+            )
+            documents.append(doc)
+        documents.extend(table_documents)
+        log_message(f"📋 Всего создано {len(documents)} документов ({len(chunks_df)} чанков + {len(table_documents)} таблиц)")
+        log_message("🔍 Построение векторного индекса...")
+        vector_index = VectorStoreIndex.from_documents(documents)
+        retriever = VectorIndexRetriever(
+            index=vector_index,
+            similarity_top_k=20,
+            similarity_cutoff=0.7
+        )
+        custom_prompt_template = PromptTemplate(CUSTOM_PROMPT_NEW)
         response_synthesizer = get_response_synthesizer(
             response_mode=ResponseMode.TREE_SUMMARIZE,
             text_qa_template=custom_prompt_template
         )
+        query_engine = RetrieverQueryEngine(
+            retriever=retriever,
+            response_synthesizer=response_synthesizer
+        )
+        log_message("✅ Система успешно инициализирована!")
+        return True
+    except Exception as e:
+        log_message(f"❌ Ошибка инициализации: {str(e)}")
+        return False
+def answer_question(question, history):
+    global query_engine, chunks_df, chat_history
+    if query_engine is None:
+        return history + [["", "❌ Система не инициализирована"]], ""
+    try:
+        start_time = time.time()
+        chat_history_text = format_chat_history()
+        query_with_history = question
+        response = query_engine.query(query_with_history)
+        retrieved_nodes = query_engine.retriever.retrieve(query_with_history)
         end_time = time.time()
         processing_time = end_time - start_time
+        bot_response = response.response
+        chat_history.append((question, bot_response))
+        if len(chat_history) > 10:
+            chat_history = chat_history[-10:]
         sources_html = generate_sources_html(retrieved_nodes)
+        response_with_time = f"{bot_response}\n\n⏱️ Время обработки: {processing_time:.2f} сек"
+        history.append([question, response_with_time])
+        return history, sources_html
     except Exception as e:
+        error_msg = f"❌ Ошибка обработки вопроса: {str(e)}"
+        history.append([question, error_msg])
+        return history, ""
 def generate_sources_html(nodes):
     html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; max-height: 400px; overflow-y: auto;'>"
+    html += "<h3 style='color: #63b3ed; margin-top: 0;'>📚 Источники:</h3>"
     unique_docs = {}
     for node in nodes:
         metadata = node.metadata if hasattr(node, 'metadata') else {}
+        doc_id = metadata.get('document_id', 'unknown')
+        if doc_id not in unique_docs:
+            unique_docs[doc_id] = []
+        unique_docs[doc_id].append(node)
+    for doc_id, doc_nodes in unique_docs.items():
+        file_link = None
+        if chunks_df is not None and 'file_link' in chunks_df.columns:
+            doc_rows = chunks_df[chunks_df['document_id'] == doc_id]
+            if not doc_rows.empty:
+                file_link = doc_rows.iloc[0]['file_link']
         html += f"<div style='margin-bottom: 15px; padding: 15px; border: 1px solid #4a5568; border-radius: 8px; background-color: #1a202c;'>"
+        html += f"<h4 style='margin: 0 0 10px 0; color: #63b3ed;'>📄 {doc_id}</h4>"
+        if file_link:
+            html += f"<a href='{file_link}' target='_blank' style='color: #68d391; text-decoration: none; font-size: 14px; display: inline-block; margin-bottom: 10px;'>🔗 Ссылка на документ</a><br>"
+        for node in doc_nodes[:3]:
+            metadata = node.metadata if hasattr(node, 'metadata') else {}
+            if 'table_number' in metadata:
+                html += f"<p style='font-size: 12px; color: #a0aec0; margin: 5px 0;'>📊 Таблица {metadata['table_number']}: {metadata.get('table_title', 'Без названия')}</p>"
         html += "</div>"
     html += "</div>"
     return html
+def clear_chat():
+    global chat_history
+    chat_history = []
+    return [], ""
+def create_demo_interface():
+    with gr.Blocks(title="AIEXP - AI Expert для нормативной документации", theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
         # AIEXP - Artificial Intelligence Expert
+        ## Инструмент для работы с нормативной документацией
         """)
+        with gr.Tab("💬 Чат с документами"):
+            gr.Markdown("### Задайте вопрос по нормативной документации")
             with gr.Row():
+                with gr.Column(scale=2):
                     chatbot = gr.Chatbot(
+                        label="Диалог с AIEXP",
+                        height=500,
+                        show_copy_button=True
                     )
+                    with gr.Row():
+                        msg = gr.Textbox(
+                            label="Ваш вопрос",
+                            placeholder="Введите вопрос по нормативным документам...",
+                            lines=2,
+                            scale=4
+                        )
+                        send_btn = gr.Button("📤 Отправить", variant="primary", scale=1)
+                    with gr.Row():
+                        clear_btn = gr.Button("🗑️ Очистить чат", variant="secondary")
                     gr.Examples(
                         examples=[
                             "Кто несет ответственность за организацию и проведение признания протоколов испытаний продукции?",
                             "В каких случаях могут быть признаны протоколы испытаний, проведенные лабораториями, не включенными в перечисления?",
                         ],
+                        inputs=msg
                     )
                 with gr.Column(scale=1):
                     sources_output = gr.HTML(
+                        label="Источники",
+                        value="<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; text-align: center;'>Здесь появятся источники...</div>",
                     )
+            def user_message(message, history):
+                return "", history + [[message, None]]
+            def bot_message(history):
+                if history and history[-1][1] is None:
+                    user_msg = history[-1][0]
+                    updated_history, sources = answer_question(user_msg, history[:-1])
+                    return updated_history, sources
+                return history, ""
+            msg.submit(user_message, [msg, chatbot], [msg, chatbot], queue=False).then(
+                bot_message, chatbot, [chatbot, sources_output]
             )
+            send_btn.click(user_message, [msg, chatbot], [msg, chatbot], queue=False).then(
+                bot_message, chatbot, [chatbot, sources_output]
             )
+            clear_btn.click(clear_chat, outputs=[chatbot, sources_output])
     return demo
 if __name__ == "__main__":
+    log_message("🚀 Запуск AIEXP - AI Expert для нормативной документации")
+    if initialize_models():
+        log_message("🌟 Запуск веб-интерфейса...")
+        demo = create_demo_interface()
         demo.launch(
             server_name="0.0.0.0",
             server_port=7860,
             share=True,
+            debug=False
         )
+    else:
+        log_message("❌ Невозможно запустить приложение из-за ошибки инициализации")
+        sys.exit(1)