Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
from flask import Flask, render_template, request, Response, jsonify
|
| 2 |
from llama_cpp import Llama
|
| 3 |
-
from duckduckgo_search import DDGS
|
| 4 |
import logging
|
| 5 |
import time
|
| 6 |
import requests
|
|
@@ -17,8 +16,8 @@ app.secret_key = 'super_secret_key'
|
|
| 17 |
message_queue = queue.Queue()
|
| 18 |
|
| 19 |
# Конфигурация модели
|
| 20 |
-
MODEL_REPO = "
|
| 21 |
-
MODEL_FILE = "
|
| 22 |
|
| 23 |
# Загрузка модели при запуске
|
| 24 |
MODEL_PATH = hf_hub_download(
|
|
@@ -51,16 +50,27 @@ SYSTEM_PROMPT = """
|
|
| 51 |
1. Формат ответа строго:
|
| 52 |
**Проблема:** [краткое описание]
|
| 53 |
**Решение:** [пошаговые действия]
|
| 54 |
-
|
| 55 |
-
|
| 56 |
4. Не добавляй дополнительные разделы после "Источники"
|
| 57 |
5. Не дублируй разделы
|
| 58 |
-
6. Будь краток
|
| 59 |
7. Используй точные технические термины
|
| 60 |
8. Указывай артикулы деталей при замене
|
| 61 |
9. Отвечай ТОЛЬКО на русском языке
|
|
|
|
| 62 |
"""
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
logging.basicConfig(
|
| 65 |
level=logging.INFO,
|
| 66 |
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
@@ -163,38 +173,71 @@ def generate_search_query(prompt: str) -> dict:
|
|
| 163 |
}
|
| 164 |
|
| 165 |
def web_search(query: str) -> tuple:
|
| 166 |
-
|
| 167 |
-
try:
|
| 168 |
-
message_queue.put(('log', f"🔍 Поиск в интернете: {query}"))
|
| 169 |
start_time = time.time()
|
|
|
|
|
|
|
| 170 |
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
combined_content = ""
|
| 181 |
sources = []
|
| 182 |
-
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
title = res.get("title", "Без заголовка")
|
| 185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
-
# Очищаем HTML-теги из сниппетов
|
| 188 |
cleaned_snippet = re.sub(r'<[^>]+>', '', snippet)
|
| 189 |
combined_content += f"[[Источник {i+1}]] {title}\n{cleaned_snippet}\n\n"
|
| 190 |
-
sources.append({"title": title, "url":
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
| 192 |
elapsed = time.time() - start_time
|
| 193 |
-
message_queue.put(('log', f"✅
|
| 194 |
-
return combined_content[:
|
| 195 |
|
| 196 |
except Exception as e:
|
| 197 |
-
error_msg = f"❌
|
| 198 |
message_queue.put(('log', error_msg))
|
| 199 |
return f"Поиск недоступен: {str(e)}", []
|
| 200 |
|
|
@@ -226,29 +269,56 @@ def process_query(prompt: str):
|
|
| 226 |
try:
|
| 227 |
start_time = time.time()
|
| 228 |
message_queue.put(('log', f"👤 Запрос: {prompt}"))
|
|
|
|
| 229 |
|
| 230 |
# Извлекаем данные
|
| 231 |
norm_data = generate_search_query(prompt)
|
| 232 |
-
message_queue.put(('log', f"
|
| 233 |
|
| 234 |
# Выполняем поиск
|
| 235 |
search_data, sources = web_search(norm_data['search_query'])
|
| 236 |
-
message_queue.put(('log', f"📚
|
| 237 |
-
|
| 238 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
sources_text = "\n".join([f"[{i+1}] {s['title']} - {s['url']}" for i, s in enumerate(sources)])
|
| 240 |
-
|
| 241 |
response_prompt = f"""
|
| 242 |
<|system|>{SYSTEM_PROMPT}
|
| 243 |
Контекст:
|
| 244 |
Бренд: {norm_data['brand']}
|
| 245 |
Модель: {norm_data['model']}
|
| 246 |
Ошибка: {norm_data['error_code']}
|
| 247 |
-
|
| 248 |
-
|
| 249 |
Данные поиска:
|
| 250 |
{search_data}
|
| 251 |
-
|
| 252 |
Список источников (для справки, не включай в ответ):
|
| 253 |
{sources_text}
|
| 254 |
</s>
|
|
@@ -256,7 +326,7 @@ def process_query(prompt: str):
|
|
| 256 |
<|assistant|>
|
| 257 |
"""
|
| 258 |
|
| 259 |
-
message_queue.put(('log', "🧠
|
| 260 |
message_queue.put(('response_start', ""))
|
| 261 |
|
| 262 |
# Генерируем ответ
|
|
|
|
| 1 |
from flask import Flask, render_template, request, Response, jsonify
|
| 2 |
from llama_cpp import Llama
|
|
|
|
| 3 |
import logging
|
| 4 |
import time
|
| 5 |
import requests
|
|
|
|
| 16 |
message_queue = queue.Queue()
|
| 17 |
|
| 18 |
# Конфигурация модели
|
| 19 |
+
MODEL_REPO = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF"
|
| 20 |
+
MODEL_FILE = "Meta-Llama-3-8B-Instruct.Q5_K_M.gguf"
|
| 21 |
|
| 22 |
# Загрузка модели при запуске
|
| 23 |
MODEL_PATH = hf_hub_download(
|
|
|
|
| 50 |
1. Формат ответа строго:
|
| 51 |
**Проблема:** [краткое описание]
|
| 52 |
**Решение:** [пошаговые действия]
|
| 53 |
+
2. В решении включай ВСЕ шаги из найденной информации
|
| 54 |
+
3. Шаги решения нумеруй цифрами с точкой (1., 2., 3.)
|
| 55 |
4. Не добавляй дополнительные разделы после "Источники"
|
| 56 |
5. Не дублируй разделы
|
| 57 |
+
6. Будь краток, но исчерпывающе
|
| 58 |
7. Используй точные технические термины
|
| 59 |
8. Указывай артикулы деталей при замене
|
| 60 |
9. Отвечай ТОЛЬКО на русском языке
|
| 61 |
+
10. Всегда проверяй точность кодов ошибок
|
| 62 |
"""
|
| 63 |
|
| 64 |
+
BLACKLISTED_DOMAINS = [
|
| 65 |
+
'reddit.com',
|
| 66 |
+
'stackoverflow.com',
|
| 67 |
+
'quora.com',
|
| 68 |
+
'facebook.com',
|
| 69 |
+
'youtube.com',
|
| 70 |
+
'x.com',
|
| 71 |
+
'twitter.com'
|
| 72 |
+
]
|
| 73 |
+
|
| 74 |
logging.basicConfig(
|
| 75 |
level=logging.INFO,
|
| 76 |
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
|
|
| 173 |
}
|
| 174 |
|
| 175 |
def web_search(query: str) -> tuple:
|
| 176 |
+
message_queue.put(('log', f"🔍 Провожу поиск по запросу: {query}"))
|
|
|
|
|
|
|
| 177 |
start_time = time.time()
|
| 178 |
+
|
| 179 |
+
SERPAPI_KEY = "31c06fe621064f426c444cbdae5bd3821dd0572a6d23f445896ad5f3df6dc634"
|
| 180 |
|
| 181 |
+
params = {
|
| 182 |
+
"api_key": SERPAPI_KEY,
|
| 183 |
+
"engine": "google",
|
| 184 |
+
"q": query,
|
| 185 |
+
"hl": "ru",
|
| 186 |
+
"gl": "ru",
|
| 187 |
+
"num": 10,
|
| 188 |
+
"safe": "off",
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
response = requests.get("https://serpapi.com/search", params=params, timeout=15)
|
| 192 |
+
response.raise_for_status()
|
| 193 |
+
data = response.json()
|
| 194 |
+
|
| 195 |
combined_content = ""
|
| 196 |
sources = []
|
| 197 |
+
|
| 198 |
+
# --- Добавляем Featured Snippet (автоответ Google) ---
|
| 199 |
+
featured_snippet = data.get("featured_snippet", {})
|
| 200 |
+
if featured_snippet:
|
| 201 |
+
snippet = featured_snippet.get("snippet", "")
|
| 202 |
+
if snippet:
|
| 203 |
+
combined_content += f"[Автоответ Google]\n{snippet}\n\n"
|
| 204 |
+
sources.insert(0, {
|
| 205 |
+
"title": "Google — автоматический ответ",
|
| 206 |
+
"url": f"https://www.google.com/search?q={requests.utils.quote(query)}"
|
| 207 |
+
})
|
| 208 |
+
|
| 209 |
+
# --- Добавляем Knowledge Panel (если есть) ---
|
| 210 |
+
knowledge_panel = data.get("knowledge_panel", {})
|
| 211 |
+
if knowledge_panel:
|
| 212 |
+
title = knowledge_panel.get("title", "")
|
| 213 |
+
description = knowledge_panel.get("description", "")
|
| 214 |
+
if description:
|
| 215 |
+
combined_content += f"[Knowledge Panel] {title}\n{description}\n\n"
|
| 216 |
+
|
| 217 |
+
# --- Органические результаты ---
|
| 218 |
+
organic_results = data.get("organic_results", [])
|
| 219 |
+
for i, res in enumerate(organic_results):
|
| 220 |
title = res.get("title", "Без заголовка")
|
| 221 |
+
link = res.get("link", "#")
|
| 222 |
+
snippet = res.get("snippet", "") or ""
|
| 223 |
+
|
| 224 |
+
# Фильтр по чёрному списку
|
| 225 |
+
if any(domain in link for domain in BLACKLISTED_DOMAINS):
|
| 226 |
+
continue
|
| 227 |
|
|
|
|
| 228 |
cleaned_snippet = re.sub(r'<[^>]+>', '', snippet)
|
| 229 |
combined_content += f"[[Источник {i+1}]] {title}\n{cleaned_snippet}\n\n"
|
| 230 |
+
sources.append({"title": title, "url": link})
|
| 231 |
+
|
| 232 |
+
if len(sources) >= MAX_RESULTS:
|
| 233 |
+
break
|
| 234 |
+
|
| 235 |
elapsed = time.time() - start_time
|
| 236 |
+
message_queue.put(('log', f"✅ Поиск был произведен за {elapsed:.2f}с"))
|
| 237 |
+
return combined_content[:6000], sources # увеличили лимит для лучшего контекста
|
| 238 |
|
| 239 |
except Exception as e:
|
| 240 |
+
error_msg = f"❌ SerpAPI ошибка: {str(e)}"
|
| 241 |
message_queue.put(('log', error_msg))
|
| 242 |
return f"Поиск недоступен: {str(e)}", []
|
| 243 |
|
|
|
|
| 269 |
try:
|
| 270 |
start_time = time.time()
|
| 271 |
message_queue.put(('log', f"👤 Запрос: {prompt}"))
|
| 272 |
+
message_queue.put(('log', f"⚙️ Извлекаю параметры из входящего запроса"))
|
| 273 |
|
| 274 |
# Извлекаем данные
|
| 275 |
norm_data = generate_search_query(prompt)
|
| 276 |
+
message_queue.put(('log', f"⏏️ Извлечено: {json.dumps(norm_data, ensure_ascii=False)}"))
|
| 277 |
|
| 278 |
# Выполняем поиск
|
| 279 |
search_data, sources = web_search(norm_data['search_query'])
|
| 280 |
+
message_queue.put(('log', f"📚 Собрано: {len(search_data)} символов в {len(sources)} источнике(-ах)"))
|
| 281 |
+
|
| 282 |
+
message_queue.put(('log', f"⚙️ Определяю проблему"))
|
| 283 |
+
problem_analysis_prompt = f"""
|
| 284 |
+
<|system|>
|
| 285 |
+
Опиши СУТЬ проблемы в одном предложении.
|
| 286 |
+
Только диагноз, без решений, действий или рекомендаций.
|
| 287 |
+
Не более 12 слов. На русском.
|
| 288 |
+
</s>
|
| 289 |
+
<|user|>
|
| 290 |
+
Запрос пользователя: {prompt}
|
| 291 |
+
Поисковые данные:
|
| 292 |
+
{search_data}
|
| 293 |
+
</s>
|
| 294 |
+
<|assistant|>
|
| 295 |
+
"""
|
| 296 |
+
|
| 297 |
+
problem_response = llm(
|
| 298 |
+
problem_analysis_prompt,
|
| 299 |
+
max_tokens=150,
|
| 300 |
+
temperature=0.2,
|
| 301 |
+
stop=["</s>", "<|user|>"]
|
| 302 |
+
)
|
| 303 |
+
extracted_problem = problem_response['choices'][0]['text'].strip()
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
if not extracted_problem or len(extracted_problem) < 5:
|
| 307 |
+
extracted_problem = f"Неисправность {norm_data['brand']} {norm_data['model']}"
|
| 308 |
+
|
| 309 |
+
message_queue.put(('log', f"🧩 Определённая проблема: {extracted_problem}"))
|
| 310 |
+
|
| 311 |
sources_text = "\n".join([f"[{i+1}] {s['title']} - {s['url']}" for i, s in enumerate(sources)])
|
| 312 |
+
|
| 313 |
response_prompt = f"""
|
| 314 |
<|system|>{SYSTEM_PROMPT}
|
| 315 |
Контекст:
|
| 316 |
Бренд: {norm_data['brand']}
|
| 317 |
Модель: {norm_data['model']}
|
| 318 |
Ошибка: {norm_data['error_code']}
|
| 319 |
+
Суть проблемы (на основе поиска): {extracted_problem}
|
|
|
|
| 320 |
Данные поиска:
|
| 321 |
{search_data}
|
|
|
|
| 322 |
Список источников (для справки, не включай в ответ):
|
| 323 |
{sources_text}
|
| 324 |
</s>
|
|
|
|
| 326 |
<|assistant|>
|
| 327 |
"""
|
| 328 |
|
| 329 |
+
message_queue.put(('log', "🧠 На основе полученных данных генерирую ответ..."))
|
| 330 |
message_queue.put(('response_start', ""))
|
| 331 |
|
| 332 |
# Генерируем ответ
|