liberal commited on
Update README.md
Browse files
README.md
CHANGED
|
@@ -758,4 +758,516 @@ def main():
|
|
| 758 |
print("\n👋 Система завершена.")
|
| 759 |
|
| 760 |
if __name__ == "__main__":
|
| 761 |
-
main() ``` </pre>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 758 |
print("\n👋 Система завершена.")
|
| 759 |
|
| 760 |
if __name__ == "__main__":
|
| 761 |
+
main() ``` </pre>
|
| 762 |
+
|
| 763 |
+
# A Deep Research system has been developed for our model specifically for the agent system
|
| 764 |
+
<pre> ```import asyncio
|
| 765 |
+
import aiohttp
|
| 766 |
+
import time
|
| 767 |
+
import json
|
| 768 |
+
from typing import List, Dict, Any, Optional
|
| 769 |
+
from dataclasses import dataclass
|
| 770 |
+
from urllib.parse import urlencode, urlparse
|
| 771 |
+
import re
|
| 772 |
+
from bs4 import BeautifulSoup
|
| 773 |
+
import logging
|
| 774 |
+
|
| 775 |
+
# Настройка логирования
|
| 776 |
+
logging.basicConfig(level=logging.INFO)
|
| 777 |
+
logger = logging.getLogger(__name__)
|
| 778 |
+
|
| 779 |
+
@dataclass
|
| 780 |
+
class SearchQuery:
|
| 781 |
+
"""Класс для хранения информации о поисковом запросе"""
|
| 782 |
+
query: str
|
| 783 |
+
purpose: str
|
| 784 |
+
priority: int
|
| 785 |
+
expected_results: int = 3
|
| 786 |
+
|
| 787 |
+
@dataclass
|
| 788 |
+
class WebResult:
|
| 789 |
+
"""Класс для хранения результатов веб-поиска"""
|
| 790 |
+
url: str
|
| 791 |
+
title: str
|
| 792 |
+
snippet: str
|
| 793 |
+
content: str = ""
|
| 794 |
+
relevance_score: float = 0.0
|
| 795 |
+
source_type: str = "web"
|
| 796 |
+
|
| 797 |
+
@dataclass
|
| 798 |
+
class SearchPlan:
|
| 799 |
+
"""Класс для хранения плана поиска"""
|
| 800 |
+
main_query: str
|
| 801 |
+
sub_queries: List[SearchQuery]
|
| 802 |
+
expected_outcome: str
|
| 803 |
+
search_strategy: str
|
| 804 |
+
|
| 805 |
+
class IntelligentWebSearchSystem:
|
| 806 |
+
def __init__(self):
|
| 807 |
+
self.session = None
|
| 808 |
+
self.search_engines = {
|
| 809 |
+
'duckduckgo': 'https://duckduckgo.com/html/?q=',
|
| 810 |
+
'bing': 'https://www.bing.com/search?q=',
|
| 811 |
+
'google': 'https://www.google.com/search?q='
|
| 812 |
+
}
|
| 813 |
+
|
| 814 |
+
# Мета-промпт для планирования поиска
|
| 815 |
+
self.planning_prompt = """You are an Expert Web Search Planner. Your mission is to create comprehensive search strategies for any user query.
|
| 816 |
+
|
| 817 |
+
CRITICAL INSTRUCTIONS:
|
| 818 |
+
- Always respond in the SAME LANGUAGE as the user's query (Russian/English/etc.)
|
| 819 |
+
- Create detailed search plans with multiple targeted queries
|
| 820 |
+
- Focus on gathering comprehensive information from diverse sources
|
| 821 |
+
- Prioritize queries by importance and relevance
|
| 822 |
+
|
| 823 |
+
PLANNING METHODOLOGY:
|
| 824 |
+
1. Analyze the user's query to understand:
|
| 825 |
+
- Core information needs
|
| 826 |
+
- Context and background requirements
|
| 827 |
+
- Specific details needed
|
| 828 |
+
- Current/recent information requirements
|
| 829 |
+
|
| 830 |
+
2. Create a strategic search plan with:
|
| 831 |
+
- 8-10 targeted search queries
|
| 832 |
+
- Clear purpose for each query
|
| 833 |
+
- Priority ranking (1-10)
|
| 834 |
+
- Expected number of results to examine
|
| 835 |
+
|
| 836 |
+
3. Search strategy should cover:
|
| 837 |
+
- Direct answers to the main question
|
| 838 |
+
- Background and context information
|
| 839 |
+
- Recent developments and news
|
| 840 |
+
- Technical details and specifications
|
| 841 |
+
- Alternative perspectives and opinions
|
| 842 |
+
- Related concepts and comparisons
|
| 843 |
+
|
| 844 |
+
4. Query formulation best practices:
|
| 845 |
+
- Use specific keywords and phrases
|
| 846 |
+
- Include relevant technical terms
|
| 847 |
+
- Consider different phrasings of the same concept
|
| 848 |
+
- Add date constraints for recent information
|
| 849 |
+
- Include source-specific searches when relevant
|
| 850 |
+
|
| 851 |
+
RESPONSE FORMAT:
|
| 852 |
+
Provide a JSON-like structure with:
|
| 853 |
+
- main_query: The original user query
|
| 854 |
+
- expected_outcome: What comprehensive answer should be achieved
|
| 855 |
+
- search_strategy: Overall approach description
|
| 856 |
+
- sub_queries: List of targeted search queries with purpose and priority
|
| 857 |
+
|
| 858 |
+
Example structure:
|
| 859 |
+
{
|
| 860 |
+
"main_query": "user's original question",
|
| 861 |
+
"expected_outcome": "comprehensive answer covering all aspects",
|
| 862 |
+
"search_strategy": "multi-faceted approach covering X, Y, Z",
|
| 863 |
+
"sub_queries": [
|
| 864 |
+
{
|
| 865 |
+
"query": "specific search terms",
|
| 866 |
+
"purpose": "what this search aims to find",
|
| 867 |
+
"priority": 9,
|
| 868 |
+
"expected_results": 5
|
| 869 |
+
}
|
| 870 |
+
]
|
| 871 |
+
}"""
|
| 872 |
+
|
| 873 |
+
async def __aenter__(self):
|
| 874 |
+
"""Асинхронный контекст-менеджер для сессии"""
|
| 875 |
+
self.session = aiohttp.ClientSession(
|
| 876 |
+
timeout=aiohttp.ClientTimeout(total=30),
|
| 877 |
+
headers={
|
| 878 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
| 879 |
+
}
|
| 880 |
+
)
|
| 881 |
+
return self
|
| 882 |
+
|
| 883 |
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
| 884 |
+
"""Закрытие сессии"""
|
| 885 |
+
if self.session:
|
| 886 |
+
await self.session.close()
|
| 887 |
+
|
| 888 |
+
def create_search_plan(self, user_query: str) -> SearchPlan:
|
| 889 |
+
"""Создание плана поиска на основе запроса пользователя"""
|
| 890 |
+
print(f"🧠 Создание плана поиска для: {user_query}")
|
| 891 |
+
|
| 892 |
+
# Базовый план поиска (в реальной системе здесь был бы AI-планировщик)
|
| 893 |
+
plan = self._generate_search_plan(user_query)
|
| 894 |
+
|
| 895 |
+
print(f"📋 План создан: {len(plan.sub_queries)} поисковых запросов")
|
| 896 |
+
return plan
|
| 897 |
+
|
| 898 |
+
def _generate_search_plan(self, user_query: str) -> SearchPlan:
|
| 899 |
+
"""Генерация плана поиска (упрощенная версия)"""
|
| 900 |
+
# Определяем тип запроса
|
| 901 |
+
query_lower = user_query.lower()
|
| 902 |
+
|
| 903 |
+
# Базовые запросы
|
| 904 |
+
sub_queries = [
|
| 905 |
+
SearchQuery(
|
| 906 |
+
query=user_query,
|
| 907 |
+
purpose="Прямой ответ на основной вопрос",
|
| 908 |
+
priority=10,
|
| 909 |
+
expected_results=5
|
| 910 |
+
)
|
| 911 |
+
]
|
| 912 |
+
|
| 913 |
+
# Добавляем контекстные запросы
|
| 914 |
+
if any(word in query_lower for word in ['что такое', 'что это', 'определение']):
|
| 915 |
+
sub_queries.extend([
|
| 916 |
+
SearchQuery(
|
| 917 |
+
query=f"{user_query} определение",
|
| 918 |
+
purpose="Получение точного определения",
|
| 919 |
+
priority=9,
|
| 920 |
+
expected_results=3
|
| 921 |
+
),
|
| 922 |
+
SearchQuery(
|
| 923 |
+
query=f"{user_query} примеры",
|
| 924 |
+
purpose="Практические примеры",
|
| 925 |
+
priority=7,
|
| 926 |
+
expected_results=3
|
| 927 |
+
)
|
| 928 |
+
])
|
| 929 |
+
|
| 930 |
+
if any(word in query_lower for word in ['как', 'способ', 'метод']):
|
| 931 |
+
sub_queries.extend([
|
| 932 |
+
SearchQuery(
|
| 933 |
+
query=f"{user_query} инструкция",
|
| 934 |
+
purpose="Пошаговые инструкции",
|
| 935 |
+
priority=9,
|
| 936 |
+
expected_results=4
|
| 937 |
+
),
|
| 938 |
+
SearchQuery(
|
| 939 |
+
query=f"{user_query} советы",
|
| 940 |
+
purpose="Практические советы",
|
| 941 |
+
priority=8,
|
| 942 |
+
expected_results=3
|
| 943 |
+
)
|
| 944 |
+
])
|
| 945 |
+
|
| 946 |
+
# Добавляем запросы для актуальной информации
|
| 947 |
+
sub_queries.extend([
|
| 948 |
+
SearchQuery(
|
| 949 |
+
query=f"{user_query} 2024 2025",
|
| 950 |
+
purpose="Актуальная информация",
|
| 951 |
+
priority=8,
|
| 952 |
+
expected_results=3
|
| 953 |
+
),
|
| 954 |
+
SearchQuery(
|
| 955 |
+
query=f"{user_query} новости",
|
| 956 |
+
purpose="Последние новости и развития",
|
| 957 |
+
priority=7,
|
| 958 |
+
expected_results=3
|
| 959 |
+
),
|
| 960 |
+
SearchQuery(
|
| 961 |
+
query=f"{user_query} обзор",
|
| 962 |
+
purpose="Аналитические обзоры",
|
| 963 |
+
priority=6,
|
| 964 |
+
expected_results=3
|
| 965 |
+
)
|
| 966 |
+
])
|
| 967 |
+
|
| 968 |
+
# Добавляем альтернативные формулировки
|
| 969 |
+
sub_queries.extend([
|
| 970 |
+
SearchQuery(
|
| 971 |
+
query=f"{user_query} подробно",
|
| 972 |
+
purpose="Детальная информация",
|
| 973 |
+
priority=6,
|
| 974 |
+
expected_results=3
|
| 975 |
+
),
|
| 976 |
+
SearchQuery(
|
| 977 |
+
query=f"{user_query} преимущества недостатки",
|
| 978 |
+
purpose="Анализ плюсов и минусов",
|
| 979 |
+
priority=5,
|
| 980 |
+
expected_results=3
|
| 981 |
+
),
|
| 982 |
+
SearchQuery(
|
| 983 |
+
query=f"{user_query} сравнение",
|
| 984 |
+
purpose="Сравнительный анализ",
|
| 985 |
+
priority=5,
|
| 986 |
+
expected_results=2
|
| 987 |
+
)
|
| 988 |
+
])
|
| 989 |
+
|
| 990 |
+
# Ограничиваем до 10 запросов
|
| 991 |
+
sub_queries = sorted(sub_queries, key=lambda x: x.priority, reverse=True)[:10]
|
| 992 |
+
|
| 993 |
+
return SearchPlan(
|
| 994 |
+
main_query=user_query,
|
| 995 |
+
sub_queries=sub_queries,
|
| 996 |
+
expected_outcome=f"Comprehensive information about: {user_query}",
|
| 997 |
+
search_strategy="Multi-faceted search covering definitions, examples, recent developments, and practical applications"
|
| 998 |
+
)
|
| 999 |
+
|
| 1000 |
+
async def search_duckduckgo(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
|
| 1001 |
+
"""Поиск в DuckDuckGo"""
|
| 1002 |
+
try:
|
| 1003 |
+
search_url = f"https://duckduckgo.com/html/?q={urlencode({'q': query})}"
|
| 1004 |
+
|
| 1005 |
+
async with self.session.get(search_url) as response:
|
| 1006 |
+
if response.status == 200:
|
| 1007 |
+
html = await response.text()
|
| 1008 |
+
soup = BeautifulSoup(html, 'html.parser')
|
| 1009 |
+
|
| 1010 |
+
results = []
|
| 1011 |
+
for result in soup.find_all('div', class_='result')[:max_results]:
|
| 1012 |
+
title_elem = result.find('h2')
|
| 1013 |
+
snippet_elem = result.find('div', class_='result__snippet')
|
| 1014 |
+
link_elem = result.find('a', class_='result__a')
|
| 1015 |
+
|
| 1016 |
+
if title_elem and link_elem:
|
| 1017 |
+
results.append({
|
| 1018 |
+
'title': title_elem.get_text(strip=True),
|
| 1019 |
+
'url': link_elem.get('href', ''),
|
| 1020 |
+
'snippet': snippet_elem.get_text(strip=True) if snippet_elem else '',
|
| 1021 |
+
'source': 'DuckDuckGo'
|
| 1022 |
+
})
|
| 1023 |
+
|
| 1024 |
+
return results
|
| 1025 |
+
|
| 1026 |
+
except Exception as e:
|
| 1027 |
+
logger.error(f"Error searching DuckDuckGo: {e}")
|
| 1028 |
+
return []
|
| 1029 |
+
|
| 1030 |
+
async def search_bing(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
|
| 1031 |
+
"""Поиск в Bing (упрощенная версия)"""
|
| 1032 |
+
try:
|
| 1033 |
+
search_url = f"https://www.bing.com/search?q={urlencode({'q': query})}"
|
| 1034 |
+
|
| 1035 |
+
async with self.session.get(search_url) as response:
|
| 1036 |
+
if response.status == 200:
|
| 1037 |
+
html = await response.text()
|
| 1038 |
+
soup = BeautifulSoup(html, 'html.parser')
|
| 1039 |
+
|
| 1040 |
+
results = []
|
| 1041 |
+
for result in soup.find_all('li', class_='b_algo')[:max_results]:
|
| 1042 |
+
title_elem = result.find('h2')
|
| 1043 |
+
snippet_elem = result.find('div', class_='b_caption')
|
| 1044 |
+
link_elem = title_elem.find('a') if title_elem else None
|
| 1045 |
+
|
| 1046 |
+
if title_elem and link_elem:
|
| 1047 |
+
results.append({
|
| 1048 |
+
'title': title_elem.get_text(strip=True),
|
| 1049 |
+
'url': link_elem.get('href', ''),
|
| 1050 |
+
'snippet': snippet_elem.get_text(strip=True) if snippet_elem else '',
|
| 1051 |
+
'source': 'Bing'
|
| 1052 |
+
})
|
| 1053 |
+
|
| 1054 |
+
return results
|
| 1055 |
+
|
| 1056 |
+
except Exception as e:
|
| 1057 |
+
logger.error(f"Error searching Bing: {e}")
|
| 1058 |
+
return []
|
| 1059 |
+
|
| 1060 |
+
async def fetch_webpage_content(self, url: str, max_length: int = 5000) -> str:
|
| 1061 |
+
"""Получение содержимого веб-страницы"""
|
| 1062 |
+
try:
|
| 1063 |
+
async with self.session.get(url) as response:
|
| 1064 |
+
if response.status == 200:
|
| 1065 |
+
html = await response.text()
|
| 1066 |
+
soup = BeautifulSoup(html, 'html.parser')
|
| 1067 |
+
|
| 1068 |
+
# Удаляем скрипты и стили
|
| 1069 |
+
for script in soup(["script", "style"]):
|
| 1070 |
+
script.decompose()
|
| 1071 |
+
|
| 1072 |
+
# Извлекаем текст
|
| 1073 |
+
text = soup.get_text()
|
| 1074 |
+
|
| 1075 |
+
# Очищаем текст
|
| 1076 |
+
lines = (line.strip() for line in text.splitlines())
|
| 1077 |
+
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
| 1078 |
+
text = ' '.join(chunk for chunk in chunks if chunk)
|
| 1079 |
+
|
| 1080 |
+
return text[:max_length]
|
| 1081 |
+
|
| 1082 |
+
except Exception as e:
|
| 1083 |
+
logger.error(f"Error fetching {url}: {e}")
|
| 1084 |
+
return ""
|
| 1085 |
+
|
| 1086 |
+
async def execute_search_query(self, search_query: SearchQuery) -> List[WebResult]:
|
| 1087 |
+
"""Выполнение одного поискового запроса"""
|
| 1088 |
+
print(f"🔍 Поиск: {search_query.query} (приоритет: {search_query.priority})")
|
| 1089 |
+
|
| 1090 |
+
# Выполняем поиск в разных источниках
|
| 1091 |
+
tasks = [
|
| 1092 |
+
self.search_duckduckgo(search_query.query, search_query.expected_results),
|
| 1093 |
+
self.search_bing(search_query.query, search_query.expected_results)
|
| 1094 |
+
]
|
| 1095 |
+
|
| 1096 |
+
search_results = await asyncio.gather(*tasks, return_exceptions=True)
|
| 1097 |
+
|
| 1098 |
+
# Объединяем результаты
|
| 1099 |
+
all_results = []
|
| 1100 |
+
for results in search_results:
|
| 1101 |
+
if isinstance(results, list):
|
| 1102 |
+
all_results.extend(results)
|
| 1103 |
+
|
| 1104 |
+
# Удаляем дубликаты по URL
|
| 1105 |
+
unique_results = {}
|
| 1106 |
+
for result in all_results:
|
| 1107 |
+
url = result.get('url', '')
|
| 1108 |
+
if url and url not in unique_results:
|
| 1109 |
+
unique_results[url] = result
|
| 1110 |
+
|
| 1111 |
+
# Преобразуем в WebResult объекты
|
| 1112 |
+
web_results = []
|
| 1113 |
+
for result in list(unique_results.values())[:search_query.expected_results]:
|
| 1114 |
+
web_result = WebResult(
|
| 1115 |
+
url=result['url'],
|
| 1116 |
+
title=result['title'],
|
| 1117 |
+
snippet=result['snippet'],
|
| 1118 |
+
source_type=result.get('source', 'web')
|
| 1119 |
+
)
|
| 1120 |
+
web_results.append(web_result)
|
| 1121 |
+
|
| 1122 |
+
print(f"✅ Найдено {len(web_results)} результатов для: {search_query.query}")
|
| 1123 |
+
return web_results
|
| 1124 |
+
|
| 1125 |
+
async def fetch_detailed_content(self, web_results: List[WebResult]) -> List[WebResult]:
|
| 1126 |
+
"""Получение детального содержимого веб-страниц"""
|
| 1127 |
+
print(f"📄 Загрузка содержимого {len(web_results)} страниц...")
|
| 1128 |
+
|
| 1129 |
+
tasks = []
|
| 1130 |
+
for result in web_results:
|
| 1131 |
+
task = asyncio.create_task(
|
| 1132 |
+
self.fetch_webpage_content(result.url),
|
| 1133 |
+
name=f"fetch_{result.url}"
|
| 1134 |
+
)
|
| 1135 |
+
tasks.append((result, task))
|
| 1136 |
+
|
| 1137 |
+
for result, task in tasks:
|
| 1138 |
+
try:
|
| 1139 |
+
content = await task
|
| 1140 |
+
result.content = content
|
| 1141 |
+
result.relevance_score = len(content) / 1000 # Простая оценка релевантности
|
| 1142 |
+
print(f"✅ Загружено: {result.title[:50]}...")
|
| 1143 |
+
except Exception as e:
|
| 1144 |
+
logger.error(f"Error loading content for {result.url}: {e}")
|
| 1145 |
+
result.content = result.snippet
|
| 1146 |
+
result.relevance_score = 0.1
|
| 1147 |
+
|
| 1148 |
+
return web_results
|
| 1149 |
+
|
| 1150 |
+
async def execute_search_plan(self, plan: SearchPlan) -> Dict[str, Any]:
|
| 1151 |
+
"""Выполнение плана поиска"""
|
| 1152 |
+
print(f"\n🚀 Выполнение плана поиска для: {plan.main_query}")
|
| 1153 |
+
print(f"📊 Запросов в плане: {len(plan.sub_queries)}")
|
| 1154 |
+
print("="*60)
|
| 1155 |
+
|
| 1156 |
+
start_time = time.time()
|
| 1157 |
+
|
| 1158 |
+
# Создаем задачи для всех поисковых запросов
|
| 1159 |
+
search_tasks = []
|
| 1160 |
+
for query in plan.sub_queries:
|
| 1161 |
+
task = asyncio.create_task(
|
| 1162 |
+
self.execute_search_query(query),
|
| 1163 |
+
name=f"search_{query.query}"
|
| 1164 |
+
)
|
| 1165 |
+
search_tasks.append((query, task))
|
| 1166 |
+
|
| 1167 |
+
# Выполняем все поисковые запросы параллельно
|
| 1168 |
+
all_results = []
|
| 1169 |
+
for query, task in search_tasks:
|
| 1170 |
+
try:
|
| 1171 |
+
results = await task
|
| 1172 |
+
all_results.extend(results)
|
| 1173 |
+
except Exception as e:
|
| 1174 |
+
logger.error(f"Error executing search query '{query.query}': {e}")
|
| 1175 |
+
|
| 1176 |
+
print(f"\n📊 Собрано {len(all_results)} результатов поиска")
|
| 1177 |
+
|
| 1178 |
+
# Получаем детальное содержимое страниц
|
| 1179 |
+
detailed_results = await self.fetch_detailed_content(all_results)
|
| 1180 |
+
|
| 1181 |
+
# Сортируем по релевантности
|
| 1182 |
+
detailed_results.sort(key=lambda x: x.relevance_score, reverse=True)
|
| 1183 |
+
|
| 1184 |
+
end_time = time.time()
|
| 1185 |
+
|
| 1186 |
+
return {
|
| 1187 |
+
'plan': plan,
|
| 1188 |
+
'results': detailed_results,
|
| 1189 |
+
'total_results': len(detailed_results),
|
| 1190 |
+
'execution_time': end_time - start_time,
|
| 1191 |
+
'queries_executed': len(plan.sub_queries)
|
| 1192 |
+
}
|
| 1193 |
+
|
| 1194 |
+
def format_search_results(self, search_data: Dict[str, Any]) -> str:
|
| 1195 |
+
"""Форматирование результатов поиска"""
|
| 1196 |
+
plan = search_data['plan']
|
| 1197 |
+
results = search_data['results']
|
| 1198 |
+
|
| 1199 |
+
output = f"""
|
| 1200 |
+
🎯 РЕЗУЛЬТАТЫ ИНТЕЛЛЕКТУАЛЬНОГО ПОИСКА
|
| 1201 |
+
{'='*60}
|
| 1202 |
+
|
| 1203 |
+
📝 ИСХОДНЫЙ ЗАПРОС: {plan.main_query}
|
| 1204 |
+
🎯 ЦЕЛЬ ПОИСКА: {plan.expected_outcome}
|
| 1205 |
+
📊 СТРАТЕГИЯ: {plan.search_strategy}
|
| 1206 |
+
|
| 1207 |
+
📈 СТАТИСТИКА:
|
| 1208 |
+
• Выполнено запросов: {search_data['queries_executed']}
|
| 1209 |
+
• Найдено результатов: {search_data['total_results']}
|
| 1210 |
+
• Время выполнения: {search_data['execution_time']:.2f} секунд
|
| 1211 |
+
|
| 1212 |
+
🔍 ВЫПОЛНЕННЫЕ ЗАПРОСЫ:
|
| 1213 |
+
"""
|
| 1214 |
+
|
| 1215 |
+
for i, query in enumerate(plan.sub_queries, 1):
|
| 1216 |
+
output += f" {i}. {query.query} (приоритет: {query.priority}) - {query.purpose}\n"
|
| 1217 |
+
|
| 1218 |
+
output += f"\n📋 ТОП-10 НАИБОЛЕЕ РЕЛЕВАНТНЫХ РЕЗУЛЬТАТОВ:\n{'-'*60}\n"
|
| 1219 |
+
|
| 1220 |
+
for i, result in enumerate(results[:10], 1):
|
| 1221 |
+
content_preview = result.content[:300] + "..." if len(result.content) > 300 else result.content
|
| 1222 |
+
output += f"""
|
| 1223 |
+
{i}. 📄 {result.title}
|
| 1224 |
+
🌐 URL: {result.url}
|
| 1225 |
+
📊 Релевантность: {result.relevance_score:.2f}
|
| 1226 |
+
📝 Краткое описание: {result.snippet}
|
| 1227 |
+
📖 Содержимое: {content_preview}
|
| 1228 |
+
{'-'*40}
|
| 1229 |
+
"""
|
| 1230 |
+
|
| 1231 |
+
return output
|
| 1232 |
+
|
| 1233 |
+
async def main():
|
| 1234 |
+
"""Основная функция"""
|
| 1235 |
+
print("🌐 Система интеллектуального поиска в интернете")
|
| 1236 |
+
print("="*60)
|
| 1237 |
+
print("💡 Система создает план поиска и выполняет 10 запросов параллельно")
|
| 1238 |
+
print("🔍 Каждый запрос обрабатывается в нескольких поисковых системах")
|
| 1239 |
+
print("📄 Автоматически загружается содержимое найденных страниц")
|
| 1240 |
+
print("="*60)
|
| 1241 |
+
|
| 1242 |
+
async with IntelligentWebSearchSystem() as search_system:
|
| 1243 |
+
while True:
|
| 1244 |
+
try:
|
| 1245 |
+
user_query = input("\n🔍 Введите запрос для поиска (или 'exit' для выхода): ").strip()
|
| 1246 |
+
|
| 1247 |
+
if user_query.lower() in ['exit', 'quit']:
|
| 1248 |
+
print("👋 Завершение работы...")
|
| 1249 |
+
break
|
| 1250 |
+
|
| 1251 |
+
if not user_query:
|
| 1252 |
+
print("⚠️ Пожалуйста, введите непустой запрос.")
|
| 1253 |
+
continue
|
| 1254 |
+
|
| 1255 |
+
# Создаем план поиска
|
| 1256 |
+
plan = search_system.create_search_plan(user_query)
|
| 1257 |
+
|
| 1258 |
+
# Выполняем план
|
| 1259 |
+
search_results = await search_system.execute_search_plan(plan)
|
| 1260 |
+
|
| 1261 |
+
# Выводим результаты
|
| 1262 |
+
formatted_results = search_system.format_search_results(search_results)
|
| 1263 |
+
print(formatted_results)
|
| 1264 |
+
|
| 1265 |
+
except KeyboardInterrupt:
|
| 1266 |
+
print("\n\n❌ Прервано пользователем.")
|
| 1267 |
+
break
|
| 1268 |
+
except Exception as e:
|
| 1269 |
+
print(f"❌ Ошибка: {e}")
|
| 1270 |
+
logger.error(f"Unexpected error: {e}")
|
| 1271 |
+
|
| 1272 |
+
if __name__ == "__main__":
|
| 1273 |
+
asyncio.run(main()) ``` </pre>
|