from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool import os import datetime import requests import pytz import yaml import re from bs4 import BeautifulSoup from tools.final_answer import FinalAnswerTool with open("prompts.yaml", 'r') as stream: prompt_templates = yaml.safe_load(stream) web_search = DuckDuckGoSearchTool() from Gradio_UI import GradioUI # Below is an example of a tool that does nothing. Amaze us with your creativity ! @tool def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type #Keep this format for the description / args / args description but feel free to modify the tool """A tool that does nothing yet Args: arg1: the first argument arg2: the second argument """ return "What magic will you build ?" @tool def visit_webpage(url: str) -> str: """Извлекает текстовое содержимое веб-страницы по URL. Args: url: Адрес веб-страницы для чтения """ try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive' } response = requests.get(url, headers=headers, timeout=30) content = response.text # Улучшенные паттерны для поиска заголовков patterns = [ r']*>((?!Privacy|Cookie|Terms|Log in|Sign up|Subscribe|Menu|Navigation)[^<]+)', r']*>((?!Privacy|Cookie|Terms|Log in|Sign up|Subscribe|Menu|Navigation)[^<]+)', r']*>((?!Privacy|Cookie|Terms|Log in|Sign up|Subscribe|Menu|Navigation)[^<]+)', r'class="[^"]*headline[^"]*"[^>]*>((?!Privacy|Cookie|Terms|Log in|Sign up|Subscribe|Menu|Navigation)[^<]+)]*>', r'class="[^"]*title[^"]*"[^>]*>((?!Privacy|Cookie|Terms|Log in|Sign up|Subscribe|Menu|Navigation)[^<]+)]*>' ] headlines = [] for pattern in patterns: matches = re.findall(pattern, content, re.DOTALL | re.IGNORECASE) for match in matches: # Очищаем текст от HTML-тегов и лишних пробелов clean_text = re.sub(r'<[^>]+>', '', match) clean_text = re.sub(r'\s+', ' ', clean_text).strip() # Фильтруем неинформативные заголовки if (clean_text and len(clean_text) > 20 and len(clean_text) < 200 and not any(word in clean_text.lower() for word in [ 'cookie', 'privacy', 'terms', 'subscribe', 'sign in', 'login', 'newsletter', 'advertisement', 'sponsored' ])): headlines.append(clean_text) # Удаляем дубликаты и сортируем по длине (обычно более длинные заголовки более информативны) unique_headlines = list(set(headlines)) unique_headlines.sort(key=len, reverse=True) if unique_headlines: # Извлекаем имя источника из URL source_name = url.split('/')[2].replace('www.', '') return f"Новости с {source_name}:\n" + "\n".join(unique_headlines[:5]) else: return f"Не удалось найти новости на {url}" except Exception as e: return f"Ошибка при загрузке {url}: {str(e)}" final_answer = FinalAnswerTool() model = HfApiModel( max_tokens=1048, temperature=0.5, model_id='Qwen/Qwen2.5-Coder-32B-Instruct', custom_role_conversions=None, token=os.environ.get('HF_TOKEN') ) # Создаем агента без authorized_imports agent = CodeAgent( model=model, tools=[web_search, visit_webpage, final_answer], max_steps=5, verbosity_level=1, grammar=None, planning_interval=None, name=None, description=None, prompt_templates=prompt_templates ) GradioUI(agent).launch()