Spaces:

KennyOry
/

PrintAI

Sleeping

App Files Files Community

KennyOry commited on Aug 14, 2025

Commit

b25aae4

verified ·

1 Parent(s): 49bab01

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -69

app.py CHANGED Viewed

@@ -26,7 +26,6 @@ MAX_CONTENT_LENGTH = 10000  # Максимальная длина контент
 # Новый клиент Mistral
 mistral_client = Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
 SYSTEM_PROMPT = """
 Ты PrintMaster, сервисный инженер по печатной технике. Критически важные правила:
 1. Формат ответа СТРОГО:
@@ -40,10 +39,16 @@ SYSTEM_PROMPT = """
    **Примечания:**
    - Пункт 1
    - Пункт 2
 ЖЕСТКИЕ ЗАПРЕТЫ:
 - Никогда не используй подзаголовки с ###
 - Никогда не добавляй разделы "Удалены шаги" или подобные
 - Начинай сразу с **Проблема:** без преамбул
 """
 BLACKLISTED_DOMAINS = [
@@ -204,7 +209,6 @@ def web_search(query: str) -> tuple:
         response.raise_for_status()
         data = response.json()
-        combined_content = ""
         sources = []
         full_contents = []
@@ -213,7 +217,6 @@ def web_search(query: str) -> tuple:
         if featured_snippet:
             snippet = featured_snippet.get("snippet", "")
             if snippet:
-                combined_content += f"[Автоответ Google]\n{snippet}\n\n"
                 sources.insert(0, {
                     "title": "Google — автоматический ответ",
                     "url": f"https://www.google.com/search?q={requests.utils.quote(query)}",
@@ -222,7 +225,7 @@ def web_search(query: str) -> tuple:
         # Обработка organic results
         organic_results = data.get("organic_results", [])
-        for i, res in enumerate(organic_results[:5]):  # Ограничиваемся топ-5
             title = res.get("title", "Без заголовка")
             link = res.get("link", "#")
             snippet = res.get("snippet", "") or ""
@@ -245,7 +248,6 @@ def web_search(query: str) -> tuple:
             # Форматирование контента
             cleaned_content = re.sub(r'\s+', ' ', content).strip()
-            combined_content += f"[[Источник {i+1}]] {title}\n{cleaned_content}\n\n"
             # Сохранение источника
             source_data = {
@@ -258,15 +260,15 @@ def web_search(query: str) -> tuple:
         elapsed = time.time() - start_time
         message_queue.put(('log', f"✅ Поиск был произведен за {elapsed:.2f}с. Найдено {len(sources)} источников."))
-        return combined_content[:20000], sources
     except Exception as e:
         error_msg = f"❌ SerpAPI ошибка: {str(e)}"
         message_queue.put(('log', error_msg))
-        return f"Поиск недоступен: {str(e)}", []
-def clean_response(response: str, sources: list) -> str:
     # Удаление служебных тегов
     response = re.sub(r'</?assistant>|<\|system\|>|</s>', '', response, flags=re.IGNORECASE)
@@ -298,52 +300,6 @@ def clean_response(response: str, sources: list) -> str:
     return response.strip()
-def verify_with_sources(response: str, sources: list) -> str:
-    """Проверяет соответствие ответа источникам с помощью LLM"""
-    try:
-        message_queue.put(('log', "🔍 Проверяю соответствие ответа источникам..."))
-        sources_text = "\n\n".join([
-            f"Источник {i+1} ({source['title']}):\n{source['content'][:1500]}"
-            for i, source in enumerate(sources)
-        ])
-        verification_prompt = f"""
-        Проверь соответствие решения источникам:
-        ### Ответ бота:
-        {response}
-        ### Источники:
-        {sources_text}
-        Правила проверки:
-        1. Все шаги решения должны иметь подтверждение в источниках
-        2. Детали замены должны точно соответствовать артикулам из источников
-        3. Если в ответе есть шаги не из источников - удали их
-        4. Если есть противоречия между источниками - укажи это в решении
-        5. Если ошибки в кодах ошибок - исправь
-        6. Сохрани оригинальную структуру ответа
-        Верни исправленный ответ.
-        """
-        verification = mistral_client.chat.complete(
-            model=MISTRAL_MODEL,
-            messages=[{"role": "user", "content": verification_prompt}],
-            max_tokens=2048,
-            temperature=0.1
-        )
-        verified_response = verification.choices[0].message.content
-        return verified_response.strip()
-    except Exception as e:
-        error_msg = f"❌ Ошибка верификации: {str(e)}"
-        message_queue.put(('log', error_msg))
-        return response
 def process_query(prompt: str):
     try:
         start_time = time.time()
@@ -354,16 +310,23 @@ def process_query(prompt: str):
         message_queue.put(('log', f"⏏️ Извлечено: {json.dumps(norm_data, ensure_ascii=False)}"))
         search_query = norm_data['search_query']
-        search_data, sources = web_search(search_query)
-        message_queue.put(('log', f"📚 Собрано: {len(search_data)} символов в {len(sources)} источнике(-ах)"))
         message_queue.put(('log', f"⚙️ Определяю проблему"))
         problem_response = mistral_client.chat.complete(
             model=MISTRAL_MODEL,
             messages=[
                 {"role": "system", "content": "Опиши СУТЬ проблемы в одном предложении. Только диагноз, без решений. Не более 12 слов. На русском."},
-                {"role": "user", "content": f"Запрос пользователя: {prompt}\nПоисковые данные:\n{search_data}"}
             ],
             max_tokens=150,
             temperature=0.2
@@ -375,22 +338,21 @@ def process_query(prompt: str):
         message_queue.put(('log', f"🧩 Определённая проблема: {extracted_problem}"))
-        sources_text = "\n".join([f"[{i+1}] {s['title']} - {s['url']}" for i, s in enumerate(sources)])
         messages = [
             {"role": "system", "content": SYSTEM_PROMPT + f"""
             Контекст:
             Бренд: {norm_data['brand']}
             Модель: {norm_data['model']}
             Ошибка: {norm_data['error_code']}
-            Суть проблемы (на основе поиска): {extracted_problem}
-            Данные поиска:
-            {search_data}
             """},
             {"role": "user", "content": f"Проблема: {prompt}"}
         ]
-        message_queue.put(('log', "🧠 На основе полученных данных генерирую ответ..."))
         message_queue.put(('response_start', ""))
         full_response = ""
@@ -405,18 +367,20 @@ def process_query(prompt: str):
                 full_response += chunk_text
                 message_queue.put(('response_chunk', chunk_text))
-        # Проверка соответствия источникам
-        verified_response = verify_with_sources(full_response, sources)
         # Очистка и форматирование ответа
-        final_response = clean_response(verified_response, sources)
         message_queue.put(('response_end', final_response))
         message_queue.put(('sources', json.dumps(sources)))
         total_time = time.time() - start_time
-        message_queue.put(('log', f"💡 Ответ: {final_response[:200]}..."))
-        message_queue.put(('log', f"⏱ Время: {total_time:.1f}с"))
         message_queue.put(('done', ''))
     except Exception as e:

 # Новый клиент Mistral
 mistral_client = Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
 SYSTEM_PROMPT = """
 Ты PrintMaster, сервисный инженер по печатной технике. Критически важные правила:
 1. Формат ответа СТРОГО:
    **Примечания:**
    - Пункт 1
    - Пункт 2
+4. Источники ТОЛЬКО в конце:
+   **Источники информации:**
+   - [Краткое описание источника 1]
+   - [Краткое описание источника 2]
 ЖЕСТКИЕ ЗАПРЕТЫ:
 - Никогда не используй подзаголовки с ###
 - Никогда не добавляй разделы "Удалены шаги" или подобные
+- Никогда не используй ссылки в формате [[Источник 1]]
 - Начинай сразу с **Проблема:** без преамбул
+- Всегда основывай решение ТОЛЬКО на предоставленных источниках
 """
 BLACKLISTED_DOMAINS = [
         response.raise_for_status()
         data = response.json()
         sources = []
         full_contents = []
         if featured_snippet:
             snippet = featured_snippet.get("snippet", "")
             if snippet:
                 sources.insert(0, {
                     "title": "Google — автоматический ответ",
                     "url": f"https://www.google.com/search?q={requests.utils.quote(query)}",
         # Обработка organic results
         organic_results = data.get("organic_results", [])
+        for i, res in enumerate(organic_results[:MAX_RESULTS]):  # Ограничиваемся топ-результатами
             title = res.get("title", "Без заголовка")
             link = res.get("link", "#")
             snippet = res.get("snippet", "") or ""
             # Форматирование контента
             cleaned_content = re.sub(r'\s+', ' ', content).strip()
             # Сохранение источника
             source_data = {
         elapsed = time.time() - start_time
         message_queue.put(('log', f"✅ Поиск был произведен за {elapsed:.2f}с. Найдено {len(sources)} источников."))
+        return sources
     except Exception as e:
         error_msg = f"❌ SerpAPI ошибка: {str(e)}"
         message_queue.put(('log', error_msg))
+        return []
+def clean_response(response: str) -> str:
     # Удаление служебных тегов
     response = re.sub(r'</?assistant>|<\|system\|>|</s>', '', response, flags=re.IGNORECASE)
     return response.strip()
 def process_query(prompt: str):
     try:
         start_time = time.time()
         message_queue.put(('log', f"⏏️ Извлечено: {json.dumps(norm_data, ensure_ascii=False)}"))
         search_query = norm_data['search_query']
+        sources = web_search(search_query)
+        message_queue.put(('log', f"📚 Найдено {len(sources)} источников"))
+        # Формируем контекст для LLM
+        context_content = ""
+        for i, source in enumerate(sources):
+            context_content += f"[[Источник {i+1}]] {source['title']}\n{source['content']}\n\n"
+        context_content = context_content.strip()
         message_queue.put(('log', f"⚙️ Определяю проблему"))
         problem_response = mistral_client.chat.complete(
             model=MISTRAL_MODEL,
             messages=[
                 {"role": "system", "content": "Опиши СУТЬ проблемы в одном предложении. Только диагноз, без решений. Не более 12 слов. На русском."},
+                {"role": "user", "content": f"Запрос пользователя: {prompt}\nДанные из источников:\n{context_content}"}
             ],
             max_tokens=150,
             temperature=0.2
         message_queue.put(('log', f"🧩 Определённая проблема: {extracted_problem}"))
+        # Формируем промпт с источниками
         messages = [
             {"role": "system", "content": SYSTEM_PROMPT + f"""
             Контекст:
             Бренд: {norm_data['brand']}
             Модель: {norm_data['model']}
             Ошибка: {norm_data['error_code']}
+            Суть проблемы (на основе источников): {extracted_problem}
+            Данные из источников:
+            {context_content}
             """},
             {"role": "user", "content": f"Проблема: {prompt}"}
         ]
+        message_queue.put(('log', "🧠 Генерирую ответ на основе источников..."))
         message_queue.put(('response_start', ""))
         full_response = ""
                 full_response += chunk_text
                 message_queue.put(('response_chunk', chunk_text))
         # Очистка и форматирование ответа
+        final_response = clean_response(full_response)
+        # Добавляем источники в ответ
+        if sources:
+            final_response += "\n\n**Источники информации:**\n"
+            for i, source in enumerate(sources):
+                final_response += f"- [{source['title']}]({source['url']})\n"
         message_queue.put(('response_end', final_response))
         message_queue.put(('sources', json.dumps(sources)))
         total_time = time.time() - start_time
+        message_queue.put(('log', f"💡 Ответ сгенерирован за {total_time:.1f}с"))
         message_queue.put(('done', ''))
     except Exception as e: