Spaces:
Sleeping
Sleeping
| from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool | |
| import os | |
| import datetime | |
| import requests | |
| import pytz | |
| import yaml | |
| import re | |
| from bs4 import BeautifulSoup | |
| from tools.final_answer import FinalAnswerTool | |
| with open("prompts.yaml", 'r') as stream: | |
| prompt_templates = yaml.safe_load(stream) | |
| web_search = DuckDuckGoSearchTool() | |
| from Gradio_UI import GradioUI | |
| # Below is an example of a tool that does nothing. Amaze us with your creativity ! | |
| def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type | |
| #Keep this format for the description / args / args description but feel free to modify the tool | |
| """A tool that does nothing yet | |
| Args: | |
| arg1: the first argument | |
| arg2: the second argument | |
| """ | |
| return "What magic will you build ?" | |
| def visit_webpage(url: str) -> str: | |
| """Извлекает текстовое содержимое веб-страницы по URL. | |
| Args: | |
| url: Адрес веб-страницы для чтения | |
| """ | |
| try: | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', | |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | |
| 'Accept-Language': 'en-US,en;q=0.5', | |
| 'Connection': 'keep-alive' | |
| } | |
| response = requests.get(url, headers=headers, timeout=30) | |
| content = response.text | |
| # Улучшенные паттерны для поиска заголовков | |
| patterns = [ | |
| r'<h1[^>]*>((?!Privacy|Cookie|Terms|Log in|Sign up|Subscribe|Menu|Navigation)[^<]+)</h1>', | |
| r'<h2[^>]*>((?!Privacy|Cookie|Terms|Log in|Sign up|Subscribe|Menu|Navigation)[^<]+)</h2>', | |
| r'<h3[^>]*>((?!Privacy|Cookie|Terms|Log in|Sign up|Subscribe|Menu|Navigation)[^<]+)</h3>', | |
| r'class="[^"]*headline[^"]*"[^>]*>((?!Privacy|Cookie|Terms|Log in|Sign up|Subscribe|Menu|Navigation)[^<]+)</[^>]*>', | |
| r'class="[^"]*title[^"]*"[^>]*>((?!Privacy|Cookie|Terms|Log in|Sign up|Subscribe|Menu|Navigation)[^<]+)</[^>]*>' | |
| ] | |
| headlines = [] | |
| for pattern in patterns: | |
| matches = re.findall(pattern, content, re.DOTALL | re.IGNORECASE) | |
| for match in matches: | |
| # Очищаем текст от HTML-тегов и лишних пробелов | |
| clean_text = re.sub(r'<[^>]+>', '', match) | |
| clean_text = re.sub(r'\s+', ' ', clean_text).strip() | |
| # Фильтруем неинформативные заголовки | |
| if (clean_text and | |
| len(clean_text) > 20 and | |
| len(clean_text) < 200 and | |
| not any(word in clean_text.lower() for word in [ | |
| 'cookie', 'privacy', 'terms', 'subscribe', 'sign in', | |
| 'login', 'newsletter', 'advertisement', 'sponsored' | |
| ])): | |
| headlines.append(clean_text) | |
| # Удаляем дубликаты и сортируем по длине (обычно более длинные заголовки более информативны) | |
| unique_headlines = list(set(headlines)) | |
| unique_headlines.sort(key=len, reverse=True) | |
| if unique_headlines: | |
| # Извлекаем имя источника из URL | |
| source_name = url.split('/')[2].replace('www.', '') | |
| return f"Новости с {source_name}:\n" + "\n".join(unique_headlines[:5]) | |
| else: | |
| return f"Не удалось найти новости на {url}" | |
| except Exception as e: | |
| return f"Ошибка при загрузке {url}: {str(e)}" | |
| final_answer = FinalAnswerTool() | |
| model = HfApiModel( | |
| max_tokens=1048, | |
| temperature=0.5, | |
| model_id='Qwen/Qwen2.5-Coder-32B-Instruct', | |
| custom_role_conversions=None, | |
| token=os.environ.get('HF_TOKEN') | |
| ) | |
| # Создаем агента без authorized_imports | |
| agent = CodeAgent( | |
| model=model, | |
| tools=[web_search, visit_webpage, final_answer], | |
| max_steps=5, | |
| verbosity_level=1, | |
| grammar=None, | |
| planning_interval=None, | |
| name=None, | |
| description=None, | |
| prompt_templates=prompt_templates | |
| ) | |
| GradioUI(agent).launch() |