carloscapote commited on
Commit
e06addd
·
1 Parent(s): a3e63cc

truncate articles from the rss feed

Browse files
Files changed (4) hide show
  1. Gradio_UI.py +2 -3
  2. app.py +2 -1
  3. ollama.env +1 -1
  4. tools/read_rss_feed.py +28 -6
Gradio_UI.py CHANGED
@@ -290,8 +290,7 @@ class GradioUI:
290
  [stored_messages, text_input],
291
  ).then(self.interact_with_agent, [stored_messages, chatbot], [chatbot])
292
 
293
- share = not os.getenv("GRADIO_SHARE") == "false"
294
- print("GRADIO_SHARE is set to", os.getenv("GRADIO_SHARE"))
295
- demo.launch(debug=True, share=share, **kwargs)
296
 
297
  __all__ = ["stream_to_gradio", "GradioUI"]
 
290
  [stored_messages, text_input],
291
  ).then(self.interact_with_agent, [stored_messages, chatbot], [chatbot])
292
 
293
+ gradio_share = not os.getenv("GRADIO_SHARE") == "false"
294
+ demo.launch(debug=True, share=gradio_share, **kwargs)
 
295
 
296
  __all__ = ["stream_to_gradio", "GradioUI"]
app.py CHANGED
@@ -32,7 +32,8 @@ def get_current_time_in_timezone(timezone: str) -> str:
32
 
33
  def choose_model():
34
  if os.getenv("OLLAMA_MODEL"):
35
- print("Using an Ollama model")
 
36
  return LiteLLMModel(
37
  model_id=os.getenv("OLLAMA_MODEL"),
38
  api_base=os.getenv("OLLAMA_ENDPOINT"),
 
32
 
33
  def choose_model():
34
  if os.getenv("OLLAMA_MODEL"):
35
+ print("Using an Ollama model: ", os.getenv("OLLAMA_MODEL"))
36
+
37
  return LiteLLMModel(
38
  model_id=os.getenv("OLLAMA_MODEL"),
39
  api_base=os.getenv("OLLAMA_ENDPOINT"),
ollama.env CHANGED
@@ -1,5 +1,5 @@
1
  OLLAMA_ENDPOINT="http://localhost:11434"
2
- OLLAMA_MODEL="ollama_chat/llama3-groq-tool-use"
3
  OLLAMA_KEY=""
4
 
5
  GRADIO_SHARE=false
 
1
  OLLAMA_ENDPOINT="http://localhost:11434"
2
+ OLLAMA_MODEL="ollama_chat/mistral:7b"
3
  OLLAMA_KEY=""
4
 
5
  GRADIO_SHARE=false
tools/read_rss_feed.py CHANGED
@@ -1,13 +1,35 @@
1
  from smolagents.tools import Tool
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  class ReadRssFeedTool(Tool):
4
  name = "read_rss_feed"
5
- description = "Read the articles from an RSS feed."
6
- inputs = {'url': {'type': 'string', 'description': 'The url of the RSS feed (example: https://www.eldiario.es/rss).'}}
 
 
 
 
7
  output_type = "string"
8
 
9
- def forward(self, url: str) -> str:
10
- """Read articles from an RSS feed."""
11
  import re
12
  from requests import get
13
  from rss_parser import RSSParser
@@ -18,6 +40,6 @@ class ReadRssFeedTool(Tool):
18
  for item in rss.channel.items:
19
  articles += "## " + item.title.content + "\n"
20
  articles += item.pub_date.content + "\n\n"
21
- content = markdownify(item.description.content).strip()
22
  articles += content + "\n\n"
23
- return articles
 
1
  from smolagents.tools import Tool
2
+ from markdownify import MarkdownConverter
3
+
4
+ class CustomMarkdownConverter(MarkdownConverter):
5
+ """
6
+ Create a custom Markdown converter that ignores images and
7
+ ignores links, keeping only their text.
8
+ """
9
+ def convert_img(self, el, text, parent_tags):
10
+ return ""
11
+ def convert_a(self, el, text, parent_tags):
12
+ return text
13
+
14
+ def markdown_summary(html, article_limit):
15
+ rendered = CustomMarkdownConverter().convert(html).strip()
16
+ if len(rendered) <= article_limit:
17
+ return rendered
18
+ else:
19
+ return rendered[:article_limit] + '...'
20
 
21
  class ReadRssFeedTool(Tool):
22
  name = "read_rss_feed"
23
+ description = "Read the articles from an RSS feed and return them in a single text with markdown format."
24
+ inputs = {
25
+ 'url': {'type': 'string', 'description': 'The url of the RSS feed (example: https://www.eldiario.es/rss).'},
26
+ 'article_limit': {'type': 'integer', 'nullable': True, 'default': 512, 'description': 'Number of characters that will be considered for each article'},
27
+ 'feed_limit': {'type': 'integer', 'nullable': True, 'default': 4096, 'description': 'Number of characters that will be considered for the whole article'},
28
+ }
29
  output_type = "string"
30
 
31
+ def forward(self, url: str, article_limit: int=512, feed_limit: int=4096) -> str:
32
+ """Read all the articles from a given RSS feed and return them as a single markdown formatted text."""
33
  import re
34
  from requests import get
35
  from rss_parser import RSSParser
 
40
  for item in rss.channel.items:
41
  articles += "## " + item.title.content + "\n"
42
  articles += item.pub_date.content + "\n\n"
43
+ content = markdown_summary(item.description.content, article_limit)
44
  articles += content + "\n\n"
45
+ return articles[:feed_limit]