Spaces:
Sleeping
Sleeping
File size: 4,418 Bytes
9b5b26a 6a5d3c0 9b5b26a c19d193 fde458a e36e662 6aae614 ddd5aa2 b77a101 8fe992b 9b5b26a 5df72d6 9b5b26a 3d1237b 9b5b26a 8c01ffb bc41013 fde458a bc41013 f0d1264 bc41013 247a82f a29018c bc41013 247a82f f0d1264 a29018c fde458a a29018c f0d1264 247a82f fde458a 247a82f a29018c 247a82f a29018c 247a82f f0d1264 a29018c 247a82f a29018c f0d1264 247a82f a29018c 247a82f a29018c 247a82f bc41013 a29018c bc41013 6aae614 ae7a494 e121372 247a82f 13d500a 8c01ffb 247a82f 8c01ffb 8fe992b e36e662 f707a69 8c01ffb 247a82f 8fe992b 9b5b26a 8c01ffb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
import os
import datetime
import requests
import pytz
import yaml
import re
from bs4 import BeautifulSoup
from tools.final_answer import FinalAnswerTool
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
web_search = DuckDuckGoSearchTool()
from Gradio_UI import GradioUI
# Below is an example of a tool that does nothing. Amaze us with your creativity !
@tool
def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
#Keep this format for the description / args / args description but feel free to modify the tool
"""A tool that does nothing yet
Args:
arg1: the first argument
arg2: the second argument
"""
return "What magic will you build ?"
@tool
def visit_webpage(url: str) -> str:
"""Извлекает текстовое содержимое веб-страницы по URL.
Args:
url: Адрес веб-страницы для чтения
"""
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Connection': 'keep-alive'
}
response = requests.get(url, headers=headers, timeout=30)
content = response.text
# Улучшенные паттерны для поиска заголовков
patterns = [
r'<h1[^>]*>((?!Privacy|Cookie|Terms|Log in|Sign up|Subscribe|Menu|Navigation)[^<]+)</h1>',
r'<h2[^>]*>((?!Privacy|Cookie|Terms|Log in|Sign up|Subscribe|Menu|Navigation)[^<]+)</h2>',
r'<h3[^>]*>((?!Privacy|Cookie|Terms|Log in|Sign up|Subscribe|Menu|Navigation)[^<]+)</h3>',
r'class="[^"]*headline[^"]*"[^>]*>((?!Privacy|Cookie|Terms|Log in|Sign up|Subscribe|Menu|Navigation)[^<]+)</[^>]*>',
r'class="[^"]*title[^"]*"[^>]*>((?!Privacy|Cookie|Terms|Log in|Sign up|Subscribe|Menu|Navigation)[^<]+)</[^>]*>'
]
headlines = []
for pattern in patterns:
matches = re.findall(pattern, content, re.DOTALL | re.IGNORECASE)
for match in matches:
# Очищаем текст от HTML-тегов и лишних пробелов
clean_text = re.sub(r'<[^>]+>', '', match)
clean_text = re.sub(r'\s+', ' ', clean_text).strip()
# Фильтруем неинформативные заголовки
if (clean_text and
len(clean_text) > 20 and
len(clean_text) < 200 and
not any(word in clean_text.lower() for word in [
'cookie', 'privacy', 'terms', 'subscribe', 'sign in',
'login', 'newsletter', 'advertisement', 'sponsored'
])):
headlines.append(clean_text)
# Удаляем дубликаты и сортируем по длине (обычно более длинные заголовки более информативны)
unique_headlines = list(set(headlines))
unique_headlines.sort(key=len, reverse=True)
if unique_headlines:
# Извлекаем имя источника из URL
source_name = url.split('/')[2].replace('www.', '')
return f"Новости с {source_name}:\n" + "\n".join(unique_headlines[:5])
else:
return f"Не удалось найти новости на {url}"
except Exception as e:
return f"Ошибка при загрузке {url}: {str(e)}"
final_answer = FinalAnswerTool()
model = HfApiModel(
max_tokens=1048,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
custom_role_conversions=None,
token=os.environ.get('HF_TOKEN')
)
# Создаем агента без authorized_imports
agent = CodeAgent(
model=model,
tools=[web_search, visit_webpage, final_answer],
max_steps=5,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
GradioUI(agent).launch() |