HFBlogPostSummarizer / tools /hf_blog_post_id_list.py
sergiosampayo's picture
huggingface-blog-post-summarizer
4ff5af2 verified
from smolagents import Tool
from typing import Any, Optional
class SimpleTool(Tool):
name = "hf_blog_post_id_list"
description = "Get the list of Hugging Face blog post IDs."
inputs = {}
output_type = "array"
def forward(self) -> List[str]:
"""
Get the list of Hugging Face blog post IDs.
Returns:
List[str]: List of blog post IDs.
"""
import requests
from bs4 import BeautifulSoup
import re
from typing import List
r = requests.get("https://huggingface.co/blog")
soup = BeautifulSoup(r.text, "html.parser")
# Find all divs with class "SVELTE_HYDRATER contents"
blog_divs = soup.find_all('div', attrs={'class': 'SVELTE_HYDRATER contents', 'data-target': 'BlogThumbnail'})
# Extract hrefs from nested <a> elements
hrefs = []
for div in blog_divs:
a_tag = div.find('a', attrs={'class': re.compile('flex lg:col-span-1 hover:shadow-alternate')})
if a_tag and a_tag.has_attr('href'):
hrefs.append(a_tag['href'])
return hrefs