Spaces:
Sleeping
Sleeping
| from smolagents.tools import Tool | |
| from markdownify import MarkdownConverter | |
| class CustomMarkdownConverter(MarkdownConverter): | |
| """ | |
| Create a custom Markdown converter that ignores images and | |
| ignores links, keeping only their text. | |
| """ | |
| def convert_img(self, el, text, parent_tags): | |
| return "" | |
| def convert_a(self, el, text, parent_tags): | |
| return text | |
| def markdown_summary(html, article_limit): | |
| rendered = CustomMarkdownConverter().convert(html).strip() | |
| if len(rendered) <= article_limit: | |
| return rendered | |
| else: | |
| return rendered[:article_limit] + '...' | |
| class ReadRssFeedTool(Tool): | |
| name = "read_rss_feed" | |
| description = "Read the articles from an RSS feed and return them in a single text with markdown format." | |
| inputs = { | |
| 'url': {'type': 'string', 'description': 'The url of the RSS feed (example: https://www.eldiario.es/rss).'}, | |
| 'article_limit': {'type': 'integer', 'nullable': True, 'default': 512, 'description': 'Number of characters that will be considered for each article'}, | |
| 'feed_limit': {'type': 'integer', 'nullable': True, 'default': 4096, 'description': 'Number of characters that will be considered for the whole article'}, | |
| } | |
| output_type = "string" | |
| def forward(self, url: str, article_limit: int=512, feed_limit: int=4096) -> str: | |
| """Read all the articles from a given RSS feed and return them as a single markdown formatted text.""" | |
| import re | |
| from requests import get | |
| from rss_parser import RSSParser | |
| from markdownify import markdownify | |
| response = get(url) | |
| rss = RSSParser.parse(response.text) | |
| articles = "# Articles\n\n" | |
| for item in rss.channel.items: | |
| articles += "## " + item.title.content + "\n" | |
| articles += item.pub_date.content + "\n\n" | |
| content = markdown_summary(item.description.content, article_limit) | |
| articles += content + "\n\n" | |
| return articles[:feed_limit] | |