First_agent_template

Sleeping

App Files Files Community

MariaMaraShe commited on Feb 22, 2025

Commit

f0d1264

verified ·

1 Parent(s): 090cc63

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -4

app.py CHANGED Viewed

@@ -39,10 +39,9 @@ def get_current_time_in_timezone(timezone: str) -> str:
 @tool
 def visit_webpage(url: str) -> str:
-    """Извлекает текстовое содержимое веб-страницы по URL.
     Args:
-        url: Адрес веб-страницы для чтения
     """
     try:
         headers = {
@@ -50,9 +49,39 @@ def visit_webpage(url: str) -> str:
         }
         response = requests.get(url, headers=headers, timeout=30)
         soup = BeautifulSoup(response.text, 'html.parser')
         for tag in soup(['script', 'style', 'meta', 'link']):
             tag.decompose()
-        return soup.get_text(separator='\n', strip=True)
     except Exception as e:
         return f"Error fetching webpage: {str(e)}"

 @tool
 def visit_webpage(url: str) -> str:
+    """Извлекает текстовое содержимое веб-страницы по URL с улучшенной обработкой заголовков.
     Args:
+    url: Адрес веб-страницы для чтения
     """
     try:
         headers = {
         }
         response = requests.get(url, headers=headers, timeout=30)
         soup = BeautifulSoup(response.text, 'html.parser')
+        # Улучшенное извлечение заголовков
+        headlines = []
+        # Поиск заголовков в различных HTML-тегах
+        headline_tags = ['h1', 'h2', 'h3', 'h4', 'article-title', 'title']
+        for tag in headline_tags:
+            headlines.extend([
+                h.get_text(strip=True)
+                for h in soup.find_all(tag)
+                if h.get_text(strip=True)
+            ])
+        # Удаление лишних тегов и скриптов
         for tag in soup(['script', 'style', 'meta', 'link']):
             tag.decompose()
+        # Очистка и фильтрация заголовков
+        headlines = [
+            headline for headline in headlines
+            if len(headline) > 10 and len(headline) < 200
+        ]
+        # Возвращаем текст страницы с выделенными заголовками
+        full_text = soup.get_text(separator='\n', strip=True)
+        # Добавляем заголовки в начало текста
+        if headlines:
+            headlines_text = "Заголовки:\n" + "\n".join(headlines) + "\n\n"
+            return headlines_text + full_text
+        return full_text
     except Exception as e:
         return f"Error fetching webpage: {str(e)}"