MariaMaraShe commited on
Commit
247a82f
·
verified ·
1 Parent(s): d539036

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -40
app.py CHANGED
@@ -31,65 +31,66 @@ def visit_webpage(url: str) -> str:
31
  """
32
  try:
33
  headers = {
34
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
35
- 'Accept-Language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7',
36
- 'Referer': 'https://www.google.com/'
 
 
 
37
  }
38
  response = requests.get(url, headers=headers, timeout=30)
 
39
 
40
- # Сначала пытаемся найти заголовки напрямую
41
- headlines = []
 
 
 
42
  patterns = [
43
  r'<h1[^>]*>(.*?)</h1>',
44
  r'<h2[^>]*>(.*?)</h2>',
45
  r'<h3[^>]*>(.*?)</h3>',
46
- r'class="[^"]*headline[^"]*"[^>]*>(.*?)</',
47
- r'class="[^"]*title[^"]*"[^>]*>(.*?)</'
 
48
  ]
49
 
 
50
  for pattern in patterns:
51
- found = re.findall(pattern, response.text, re.DOTALL | re.IGNORECASE)
52
- headlines.extend(found)
53
-
54
- # Очистка заголовков от HTML-тегов
55
- cleaned_headlines = []
56
- for headline in headlines:
57
- clean = re.sub(r'<[^>]+>', '', headline).strip()
58
- if 10 < len(clean) < 200 and not clean.startswith('{') and not clean.startswith('.'):
59
- cleaned_headlines.append(clean)
60
 
61
- # Возврат заголовков
62
- if cleaned_headlines:
63
- return "Заголовки новостей:\n" + "\n".join(cleaned_headlines[:10])
64
 
65
- # Если не нашли заголовки, вернем часть текста страницы
66
- text_content = re.sub(r'<[^>]+>', ' ', response.text)
67
- text_content = re.sub(r'\s+', ' ', text_content).strip()
68
- return "Содержимое страницы (фрагмент):\n" + text_content[:1000]
69
-
 
 
 
70
  except Exception as e:
71
  return f"Ошибка при загрузке страницы: {str(e)}"
72
 
73
  final_answer = FinalAnswerTool()
74
 
75
- # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
76
- # model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
77
-
78
  model = HfApiModel(
79
- max_tokens=1048,
80
- temperature=0.5,
81
- model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
82
- custom_role_conversions=None,
83
- token=os.environ.get('HF_TOKEN')
84
  )
85
 
86
-
87
- # Import tool from Hub
88
- image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
89
-
90
- with open("prompts.yaml", 'r') as stream:
91
- prompt_templates = yaml.safe_load(stream)
92
-
93
  agent = CodeAgent(
94
  model=model,
95
  tools=[web_search, visit_webpage, final_answer],
@@ -99,7 +100,7 @@ agent = CodeAgent(
99
  planning_interval=None,
100
  name=None,
101
  description=None,
102
- prompt_templates=prompt_templates,
103
  )
104
 
105
 
 
31
  """
32
  try:
33
  headers = {
34
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
35
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
36
+ 'Accept-Language': 'en-US,en;q=0.5',
37
+ 'Connection': 'keep-alive',
38
+ 'Upgrade-Insecure-Requests': '1',
39
+ 'Cache-Control': 'max-age=0'
40
  }
41
  response = requests.get(url, headers=headers, timeout=30)
42
+ content = response.text
43
 
44
+ # Извлекаем текст между тегами title
45
+ title_match = re.search(r'<title>(.*?)</title>', content, re.DOTALL)
46
+ title = title_match.group(1) if title_match else ""
47
+
48
+ # Ищем заголовки новостей с разными паттернами
49
  patterns = [
50
  r'<h1[^>]*>(.*?)</h1>',
51
  r'<h2[^>]*>(.*?)</h2>',
52
  r'<h3[^>]*>(.*?)</h3>',
53
+ r'class="[^"]*headline[^"]*"[^>]*>(.*?)</[^>]*>',
54
+ r'class="[^"]*title[^"]*"[^>]*>(.*?)</[^>]*>',
55
+ r'<a[^>]*class="[^"]*"[^>]*>(.*?)</a>'
56
  ]
57
 
58
+ headlines = []
59
  for pattern in patterns:
60
+ matches = re.findall(pattern, content, re.DOTALL | re.IGNORECASE)
61
+ for match in matches:
62
+ # Очищаем текст от HTML-тегов
63
+ clean_text = re.sub(r'<[^>]+>', '', match)
64
+ # Очищаем от лишних пробелов
65
+ clean_text = re.sub(r'\s+', ' ', clean_text).strip()
66
+ if clean_text and len(clean_text) > 20 and len(clean_text) < 200:
67
+ headlines.append(clean_text)
 
68
 
69
+ # Удаляем дубликаты
70
+ unique_headlines = list(set(headlines))
 
71
 
72
+ if unique_headlines:
73
+ return "Основные новости:\n" + "\n".join(unique_headlines[:10])
74
+ else:
75
+ # Если не нашли заголовки, берем просто текст
76
+ text_content = re.sub(r'<[^>]+>', ' ', content)
77
+ text_content = re.sub(r'\s+', ' ', text_content).strip()
78
+ return text_content[:1000]
79
+
80
  except Exception as e:
81
  return f"Ошибка при загрузке страницы: {str(e)}"
82
 
83
  final_answer = FinalAnswerTool()
84
 
 
 
 
85
  model = HfApiModel(
86
+ max_tokens=1048,
87
+ temperature=0.5,
88
+ model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
89
+ custom_role_conversions=None,
90
+ token=os.environ.get('HF_TOKEN')
91
  )
92
 
93
+ # Создаем агента без authorized_imports
 
 
 
 
 
 
94
  agent = CodeAgent(
95
  model=model,
96
  tools=[web_search, visit_webpage, final_answer],
 
100
  planning_interval=None,
101
  name=None,
102
  description=None,
103
+ prompt_templates=prompt_templates
104
  )
105
 
106