First_agent_template

Sleeping

App Files Files Community

dygoo commited on Feb 14, 2025

Commit

2a54448

verified ·

1 Parent(s): 8db3b25

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -75

app.py CHANGED Viewed

@@ -6,8 +6,8 @@ import yaml
 from tools.final_answer import FinalAnswerTool
 from bs4 import BeautifulSoup
 from typing import List, Dict
-pip install feedparser
-import feedparser
 from Gradio_UI import GradioUI
@@ -17,75 +17,25 @@ from Gradio_UI import GradioUI
 search_tool = DuckDuckGoSearchTool()
 @tool
 def get_latest_news() -> Dict[str, List[Dict]]:
     """
     Tool returns latest news from major news outlets using RSS feeds.
-    Focuses on politics, economics, and world news from reputable sources.
     Returns:
-        Dict[str, List[Dict]]: A dictionary where keys are news sources and values are lists of news items
-                              containing title, link, and publication date.
     """
     rss_feeds = {
         "Reuters": {
             "World": "https://www.rss.reuters.com/world",
-            "Business": "https://www.rss.reuters.com/business",
-            "Politics": "https://www.rss.reuters.com/politics"
-        },
-        "BBC": {
-            "World": "http://feeds.bbci.co.uk/news/world/rss.xml",
-            "Business": "http://feeds.bbci.co.uk/news/business/rss.xml",
-            "Politics": "http://feeds.bbci.co.uk/news/politics/rss.xml"
-        },
-        "The Economist": {
-            "All": "https://www.economist.com/rss",
-            "Economics": "https://www.economist.com/finance-and-economics/rss.xml",
-            "World": "https://www.economist.com/international/rss.xml"
-        },
-        "Financial Times": {
-            "World": "https://www.ft.com/world?format=rss",
-            "Economics": "https://www.ft.com/global-economy?format=rss",
-            "Politics": "https://www.ft.com/politics?format=rss"
-        },
-        "WSJ": {
-            "World": "https://feeds.a.dj.com/rss/RSSWorldNews.xml",
-            "Economics": "https://feeds.a.dj.com/rss/RSSEconomy.xml",
-            "Politics": "https://feeds.a.dj.com/rss/RSSPolitics.xml"
-        },
-        "Bloomberg": {
-            "Politics": "https://www.bloomberg.com/politics/feeds/site.xml",
-            "Economics": "https://www.bloomberg.com/economics/feeds/site.xml",
-            "World": "https://www.bloomberg.com/world/feeds/site.xml"
         },
         "CNN": {
-            "World": "http://rss.cnn.com/rss/cnn_world.rss",
-            "Politics": "http://rss.cnn.com/rss/cnn_politics.rss",
-            "Business": "http://rss.cnn.com/rss/money_latest.rss"
-        },
-        "Politico": {
-            "Politics": "https://www.politico.com/rss/politicopicks.xml",
-            "Congress": "https://www.politico.com/rss/congress.xml",
-            "Economy": "https://www.politico.com/rss/economy.xml"
-        },
-        "Foreign Policy": {
-            "All": "https://foreignpolicy.com/feed/"
-        },
-        "Foreign Affairs": {
-            "All": "https://www.foreignaffairs.com/rss.xml"
         }
     }
-    def clean_summary(summary: str, max_length: int = 200) -> str:
-        """Clean and truncate summary text."""
-        if not summary:
-            return ''
-        # Remove HTML tags and excessive whitespace
-        from bs4 import BeautifulSoup
-        cleaned = BeautifulSoup(summary, 'html.parser').get_text()
-        cleaned = ' '.join(cleaned.split())
-        return cleaned[:max_length] + '...' if len(cleaned) > max_length else cleaned
     news_items = {}
     for source, feeds in rss_feeds.items():
@@ -93,26 +43,30 @@ def get_latest_news() -> Dict[str, List[Dict]]:
         for feed_name, feed_url in feeds.items():
             try:
-                feed = feedparser.parse(feed_url)
-                for entry in feed.entries[:5]:  # Get top 5 stories from each feed
-                    # Get publication date
-                    pub_date = entry.get('published_parsed', None)
-                    if pub_date:
-                        pub_date = datetime.fromtimestamp(
-                            datetime(*pub_date[:6]).timestamp(),
-                            pytz.UTC
-                        ).strftime('%Y-%m-%d %H:%M:%S UTC')
-                    # Get summary from either summary or description field
-                    summary = entry.get('summary', entry.get('description', ''))
                     news_item = {
                         'category': feed_name,
-                        'title': entry.title,
-                        'link': entry.link,
-                        'published': pub_date,
-                        'summary': clean_summary(summary)
                     }
                     news_items[source].append(news_item)
@@ -128,8 +82,6 @@ def get_latest_news() -> Dict[str, List[Dict]]:
     return news_items
 final_answer = FinalAnswerTool()
 # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:

 from tools.final_answer import FinalAnswerTool
 from bs4 import BeautifulSoup
 from typing import List, Dict
+import urllib.request
 from Gradio_UI import GradioUI
 search_tool = DuckDuckGoSearchTool()
 @tool
 def get_latest_news() -> Dict[str, List[Dict]]:
     """
     Tool returns latest news from major news outlets using RSS feeds.
+    Uses only built-in Python libraries.
     Returns:
+        Dict[str, List[Dict]]: A dictionary where keys are news sources and values are lists of news items.
     """
     rss_feeds = {
         "Reuters": {
             "World": "https://www.rss.reuters.com/world",
+            "Business": "https://www.rss.reuters.com/business"
         },
         "CNN": {
+            "Top Stories": "http://rss.cnn.com/rss/cnn_topstories.rss",
+            "World": "http://rss.cnn.com/rss/cnn_world.rss"
         }
     }
     news_items = {}
     for source, feeds in rss_feeds.items():
         for feed_name, feed_url in feeds.items():
             try:
+                # Add headers to avoid potential blocks
+                headers = {
+                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+                }
+                req = urllib.request.Request(feed_url, headers=headers)
+                response = urllib.request.urlopen(req, timeout=10)
+                xml_data = response.read().decode('utf-8')
+                # Parse XML
+                root = ET.fromstring(xml_data)
+                # Find all item elements (news articles)
+                for item in root.findall('.//item')[:5]:  # Get top 5 stories
+                    title = item.find('title')
+                    link = item.find('link')
+                    pub_date = item.find('pubDate')
+                    description = item.find('description')
                     news_item = {
                         'category': feed_name,
+                        'title': title.text if title is not None else 'No title',
+                        'link': link.text if link is not None else '',
+                        'published': pub_date.text if pub_date is not None else '',
+                        'summary': description.text[:200] + '...' if description is not None and description.text else ''
                     }
                     news_items[source].append(news_item)
     return news_items
 final_answer = FinalAnswerTool()
 # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder: