First_agent_template

Sleeping

App Files Files Community

MariaMaraShe commited on Feb 23, 2025

Commit

247a82f

verified ·

1 Parent(s): d539036

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -40

app.py CHANGED Viewed

@@ -31,65 +31,66 @@ def visit_webpage(url: str) -> str:
     """
     try:
         headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
-            'Accept-Language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7',
-            'Referer': 'https://www.google.com/'
         }
         response = requests.get(url, headers=headers, timeout=30)
-        # Сначала пытаемся найти заголовки напрямую
-        headlines = []
         patterns = [
             r'<h1[^>]*>(.*?)</h1>',
             r'<h2[^>]*>(.*?)</h2>',
             r'<h3[^>]*>(.*?)</h3>',
-            r'class="[^"]*headline[^"]*"[^>]*>(.*?)</',
-            r'class="[^"]*title[^"]*"[^>]*>(.*?)</'
         ]
         for pattern in patterns:
-            found = re.findall(pattern, response.text, re.DOTALL | re.IGNORECASE)
-            headlines.extend(found)
-        # Очистка заголовков от HTML-тегов
-        cleaned_headlines = []
-        for headline in headlines:
-            clean = re.sub(r'<[^>]+>', '', headline).strip()
-            if 10 < len(clean) < 200 and not clean.startswith('{') and not clean.startswith('.'):
-                cleaned_headlines.append(clean)
-        # Возврат заголовков
-        if cleaned_headlines:
-            return "Заголовки новостей:\n" + "\n".join(cleaned_headlines[:10])
-        # Если не нашли заголовки, вернем часть текста страницы
-        text_content = re.sub(r'<[^>]+>', ' ', response.text)
-        text_content = re.sub(r'\s+', ' ', text_content).strip()
-        return "Содержимое страницы (фрагмент):\n" + text_content[:1000]
     except Exception as e:
         return f"Ошибка при загрузке страницы: {str(e)}"
 final_answer = FinalAnswerTool()
-# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
-# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
 model = HfApiModel(
-max_tokens=1048,
-temperature=0.5,
-model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
-custom_role_conversions=None,
-token=os.environ.get('HF_TOKEN')
 )
-# Import tool from Hub
-image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
-with open("prompts.yaml", 'r') as stream:
-    prompt_templates = yaml.safe_load(stream)
 agent = CodeAgent(
     model=model,
     tools=[web_search, visit_webpage, final_answer],
@@ -99,7 +100,7 @@ agent = CodeAgent(
     planning_interval=None,
     name=None,
     description=None,
-    prompt_templates=prompt_templates,
 )

     """
     try:
         headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.5',
+            'Connection': 'keep-alive',
+            'Upgrade-Insecure-Requests': '1',
+            'Cache-Control': 'max-age=0'
         }
         response = requests.get(url, headers=headers, timeout=30)
+        content = response.text
+        # Извлекаем текст между тегами title
+        title_match = re.search(r'<title>(.*?)</title>', content, re.DOTALL)
+        title = title_match.group(1) if title_match else ""
+        # Ищем заголовки новостей с разными паттернами
         patterns = [
             r'<h1[^>]*>(.*?)</h1>',
             r'<h2[^>]*>(.*?)</h2>',
             r'<h3[^>]*>(.*?)</h3>',
+            r'class="[^"]*headline[^"]*"[^>]*>(.*?)</[^>]*>',
+            r'class="[^"]*title[^"]*"[^>]*>(.*?)</[^>]*>',
+            r'<a[^>]*class="[^"]*"[^>]*>(.*?)</a>'
         ]
+        headlines = []
         for pattern in patterns:
+            matches = re.findall(pattern, content, re.DOTALL | re.IGNORECASE)
+            for match in matches:
+                # Очищаем текст от HTML-тегов
+                clean_text = re.sub(r'<[^>]+>', '', match)
+                # Очищаем от лишних пробелов
+                clean_text = re.sub(r'\s+', ' ', clean_text).strip()
+                if clean_text and len(clean_text) > 20 and len(clean_text) < 200:
+                    headlines.append(clean_text)
+        # Удаляем дубликаты
+        unique_headlines = list(set(headlines))
+        if unique_headlines:
+            return "Основные новости:\n" + "\n".join(unique_headlines[:10])
+        else:
+            # Если не нашли заголовки, берем просто текст
+            text_content = re.sub(r'<[^>]+>', ' ', content)
+            text_content = re.sub(r'\s+', ' ', text_content).strip()
+            return text_content[:1000]
     except Exception as e:
         return f"Ошибка при загрузке страницы: {str(e)}"
 final_answer = FinalAnswerTool()
 model = HfApiModel(
+    max_tokens=1048,
+    temperature=0.5,
+    model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
+    custom_role_conversions=None,
+    token=os.environ.get('HF_TOKEN')
 )
+# Создаем агента без authorized_imports
 agent = CodeAgent(
     model=model,
     tools=[web_search, visit_webpage, final_answer],
     planning_interval=None,
     name=None,
     description=None,
+    prompt_templates=prompt_templates
 )