Spaces:
Build error
Build error
| import aiohttp | |
| import asyncio | |
| from bs4 import BeautifulSoup | |
| from urllib.parse import urlparse, parse_qs | |
| # Асинхронный запрос к странице | |
| async def fetch(session, url): | |
| try: | |
| async with session.get(url, timeout=10) as response: | |
| return await response.text() | |
| except Exception as e: | |
| return "" | |
| # Асинхронное получение текста страницы | |
| async def get_page_text(session, url): | |
| html = await fetch(session, url) | |
| if not html: | |
| return "Текст не найден" | |
| soup = BeautifulSoup(html, 'html.parser') | |
| body = soup.find('body') | |
| if body: | |
| return body.get_text(separator='\n', strip=True) | |
| return "Текст не найден" | |
| # Асинхронный поиск информации | |
| async def search_info(prompt): | |
| query = prompt.replace(' ', '+') | |
| search_url = f"https://www.google.com/search?q={query}" | |
| async with aiohttp.ClientSession() as session: | |
| html = await fetch(session, search_url) | |
| if not html: | |
| return [] | |
| soup = BeautifulSoup(html, 'html.parser') | |
| links = [] | |
| for item in soup.find_all('h3'): | |
| parent = item.find_parent('a') | |
| if parent and 'href' in parent.attrs: | |
| link = parent['href'] | |
| parsed_url = urlparse(link) | |
| if parsed_url.path == '/url': | |
| query_params = parse_qs(parsed_url.query) | |
| if 'q' in query_params: | |
| links.append(query_params['q'][0]) | |
| return links | |
| # Основной асинхронный цикл | |
| async def main(): | |
| prompt = input("Введите запрос для поиска: ") | |
| results = await search_info(prompt) | |
| if not results: | |
| print("Ничего не найдено.") | |
| return | |
| async with aiohttp.ClientSession() as session: | |
| tasks = [get_page_text(session, link) for link in results[:5]] # Ограничение до 5 ссылок для скорости | |
| texts = await asyncio.gather(*tasks) | |
| for link, text in zip(results, texts): | |
| print(f"Ссылка: {link}") | |
| print(f"Текст: {text}\n") | |
| # Запуск программы | |
| if __name__ == "__main__": | |
| asyncio.run(main()) | |