Spaces:
Sleeping
Sleeping
| from newsapi import NewsApiClient | |
| from newspaper import Article | |
| import os | |
| __export__ = ["News"] | |
| class News: | |
| __EX_SOURCES__ = {"ABC News", "Bloomberg", "The Hill", "Fox Sports", "Google News"} | |
| __CATEGORIES__ = { | |
| "business", | |
| "entertainment", | |
| "general", | |
| "health", | |
| "science", | |
| "sports", | |
| "technology" | |
| } | |
| def __init__(self): | |
| newsapi_key = os.environ.get("NEWS_API_KEY") | |
| self.newsapi = NewsApiClient(api_key=newsapi_key) | |
| def get_sources(self, category=None): | |
| sources = self.newsapi.get_sources(language="en", country="us", category=category)["sources"] | |
| sources = {source["name"] for source in sources if source["name"] not in self.__EX_SOURCES__} | |
| print(sources) | |
| return sources | |
| def get_top_headlines(self, num_headlines=None, category=None): | |
| sources = self.get_sources(category=category) | |
| headlines = self.newsapi.get_top_headlines( | |
| sources=", ".join(sources), | |
| page_size=num_headlines | |
| )["articles"] | |
| return headlines | |
| def get_headlines(self, num_headlines=None, query=None): | |
| sources = self.get_sources() | |
| headlines = self.newsapi.get_everything( | |
| q=query, | |
| sources=", ".join(sources), | |
| page_size=num_headlines | |
| )["articles"] | |
| return headlines | |
| def get_articles_from_headlines(self, headlines): | |
| for headline in headlines: | |
| article = Article(headline["url"]) | |
| article.download() | |
| article.parse() | |
| headline["content"] = article.text | |
| # headline["authors"] = article.authors | |
| headline["source"] = headline["source"]["name"] | |
| del headline["author"] | |
| # headline.pop("author", None) | |
| return headlines | |