README / fetch_top_news.py
Ismetdh's picture
Upload 12 files
04cdb6f verified
import requests
from dotenv import load_dotenv, find_dotenv
dotenv_path = find_dotenv()
load_dotenv(dotenv_path)
import os
from datetime import datetime
import requests
from categorizer import NewsCategorizer
from summarizer import Summarizer
import json
current_day = datetime.now().strftime("%Y-%m-%d")
api_key = os.getenv("WORLD_NEWS_API")
# Instantiate the NewsCategorizer class
categorizer_instance = NewsCategorizer()
summarizer_instance = Summarizer(
model_path = r'C:\Users\TDH\VIIIIIV\MVIIXXIV\2024\Senior Project 1\huggingface_model_deployment\FlanT5_base_fine_tuned_for_Summarization',
tokenizer_path = r'C:\Users\TDH\VIIIIIV\MVIIXXIV\2024\Senior Project 1\huggingface_model_deployment\FlanT5_base_fine_tuned_for_Summarization\local_google_flan_t5_tokenizer'
)
def get_top_news(language="en", date=current_day,number=30):
print("get_top_news was called")
def save_news_to_file(news_list, filename="news_data.json"):
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
# Save data with a timestamp in the file
data_to_save = {
"timestamp": timestamp,
"news": news_list
}
with open(filename, 'w') as f:
json.dump(data_to_save, f)
print(f"News data saved to {filename} at {timestamp}")
base_url = "https://api.worldnewsapi.com/search-news"
news_list = [] # List to store the news articles
params = {
'api-key': api_key,
'language': language, # Language code, e.g., 'en' for English, 'th' for Thai
'published_date': date, # Date in YYYY-MM-DD format
'sort-by': 'relevance', # You can use 'relevance', 'popularity', or 'publishedAt'
'number': number # Number of articles you want to retrieve (top 5 in this example)
}
response = requests.get(base_url, params=params)
if response.status_code == 200:
articles = response.json().get("news", [])
for i, article in enumerate(articles, start=1):
text = article.get("text", "")
predicted_category = categorizer_instance.predict_category(text)
# Generate summary using the summarizer
if text: # Ensure the text is not empty
summary = summarizer_instance.iterative_summarization(
text=text,
chunk_size=350,
overlap_size=50,
max_iterations=5,
use_prompt_template=True
)
else:
summary = "No summary available"
# Collect the data for each article
news_list.append({
"index": i,
"title": article.get("title", "No Title"),
"text": article.get("text", "No Text"), # First 10 characters of the text
"url": article.get("url", "No URL"),
"category": article.get("category", "Others"),
"summary": summary,
"predicted_category": predicted_category
})
save_news_to_file(news_list)
else:
print("Error:", response.status_code, response.text)
return news_list # Return the list of news articles
# # Example usage
# news = get_top_news(language="en", date="2024-11-19")
# for item in news:
# print(item)
# def get_top_news(language="en", date=current_day,number=30):
# temp = [
# {"index": 1, "title": "World Cup 2024: The Road to the Final", "text": "The World Cup 2024 is heating up, with top teams battling for a spot in the final... (rest of the article text)", "url": "https://example.com/world-cup-2024", "category": "Sports", "summary": "The World Cup 2024 is nearing its final stages with intense matches between the world's top teams.", "predicted_category": "Sports"},
# {"index": 2, "title": "Tech Giants Invest in AI for 2024", "text": "Major tech companies are doubling down on AI technologies with multi-billion dollar investments... (rest of the article text)", "url": "https://example.com/tech-giants-ai-investment", "category": "Science/Tech", "summary": "Tech companies are boosting AI investments, with the focus on automation and machine learning innovations.", "predicted_category": "Science/Tech"},
# {"index": 3, "title": "Stocks Hit Record High Amid Global Economic Recovery", "text": "The stock market has reached new heights, with positive news about global economic recovery... (rest of the article text)", "url": "https://example.com/stock-market-record-high", "category": "Business", "summary": "The stock market reaches an all-time high as signs of global economic recovery boost investor confidence.", "predicted_category": "Business"},
# {"index": 4, "title": "New Legislation Aims to Tackle Climate Change", "text": "Lawmakers are debating new legislation aimed at addressing climate change with stricter regulations... (rest of the article text)", "url": "https://example.com/climate-change-legislation", "category": "Politics", "summary": "New legislation to combat climate change is being debated, focusing on stricter environmental regulations.", "predicted_category": "Politics"},
# {"index": 5, "title": "AI Revolutionizing Healthcare Diagnostics", "text": "AI systems are making waves in healthcare, improving diagnostics and treatment recommendations... (rest of the article text)", "url": "https://example.com/ai-healthcare-diagnostics", "category": "Science/Tech", "summary": "AI is revolutionizing healthcare by improving diagnostic accuracy and treatment efficiency.", "predicted_category": "Science/Tech"},
# {"index": 6, "title": "Oscars 2024: The Big Winners", "text": "The Oscars 2024 saw exciting wins, with new films taking home prestigious awards... (rest of the article text)", "url": "https://example.com/oscars-2024-winners", "category": "Entertainment", "summary": "The 2024 Oscars highlighted new talent and groundbreaking films, with major wins in multiple categories.", "predicted_category": "Entertainment"},
# {"index": 7, "title": "Government Launches New Initiative for Rural Development", "text": "A new initiative has been launched by the government to boost economic development in rural areas... (rest of the article text)", "url": "https://example.com/rural-development-initiative", "category": "Politics", "summary": "The government has launched a new initiative aimed at improving infrastructure and opportunities in rural regions.", "predicted_category": "Politics"},
# {"index": 8, "title": "The Future of Renewable Energy: Solar Dominates", "text": "Solar energy is expected to dominate the renewable energy market in the coming decades... (rest of the article text)", "url": "https://example.com/future-renewable-energy-solar", "category": "Science/Tech", "summary": "Solar energy is poised to take the lead in renewable energy, driven by technological advancements and policy support.", "predicted_category": "Science/Tech"},
# {"index": 9, "title": "2024 Tokyo Marathon: The Race of the Year", "text": "The Tokyo Marathon 2024 is set to attract the best runners from across the globe... (rest of the article text)", "url": "https://example.com/tokyo-marathon-2024", "category": "Sports", "summary": "The Tokyo Marathon 2024 will feature top athletes and promising new talent, making it one of the biggest races of the year.", "predicted_category": "Sports"},
# {"index": 10, "title": "Breaking News: Major Tech Merger Announced", "text": "Two major tech firms have announced a merger, which is expected to reshape the industry... (rest of the article text)", "url": "https://example.com/tech-merger-announcement", "category": "Business", "summary": "A major tech merger has been announced, signaling significant shifts in the technology sector.", "predicted_category": "Business"},
# {"index": 11, "title": "Political Tensions Rise Ahead of Election", "text": "With the upcoming election, political tensions are escalating as parties prepare for a fierce battle... (rest of the article text)", "url": "https://example.com/political-tensions-election", "category": "Politics", "summary": "Political tensions are rising as election campaigns intensify, with major candidates vying for leadership.", "predicted_category": "Politics"},
# {"index": 12, "title": "NBA Finals 2024: The Ultimate Showdown", "text": "The 2024 NBA Finals is shaping up to be one of the most thrilling matchups in basketball history... (rest of the article text)", "url": "https://example.com/nba-finals-2024", "category": "Sports", "summary": "The 2024 NBA Finals promises an epic showdown between two powerhouse teams, drawing attention from sports fans worldwide.", "predicted_category": "Sports"},
# {"index": 13, "title": "New Cybersecurity Threats: How to Protect Your Data", "text": "As cyber threats continue to evolve, experts are warning about new vulnerabilities affecting businesses and individuals... (rest of the article text)", "url": "https://example.com/cybersecurity-threats-2024", "category": "Science/Tech", "summary": "New cybersecurity threats are emerging, and experts provide tips on how to protect your data from malicious actors.", "predicted_category": "Science/Tech"},
# {"index": 14, "title": "Hollywood Stars Attend Gala for Charity", "text": "A star-studded charity gala was held last night, raising millions for a good cause... (rest of the article text)", "url": "https://example.com/hollywood-charity-gala", "category": "Entertainment", "summary": "A gala event in Hollywood saw major stars come together to raise millions for charity, highlighting philanthropic efforts in the industry.", "predicted_category": "Entertainment"},
# {"index": 15, "title": "Oil Prices Hit New Highs: Global Market Impact", "text": "Oil prices have hit record highs this month, affecting global markets and driving up fuel costs... (rest of the article text)", "url": "https://example.com/oil-prices-new-highs", "category": "Business", "summary": "Oil prices have surged, influencing global market trends and leading to higher fuel prices across the world.", "predicted_category": "Business"},
# {"index": 16, "title": "Exploring Mars: The Latest Space Mission Updates", "text": "NASA's latest Mars mission has yielded fascinating new insights about the planet's surface... (rest of the article text)", "url": "https://example.com/mars-space-mission-2024", "category": "Science/Tech", "summary": "NASA's latest Mars mission has provided new data about the planet's surface, shedding light on its potential for future exploration.", "predicted_category": "Science/Tech"},
# {"index": 17, "title": "New National Sports Policy to Boost Youth Involvement", "text": "The government has announced a new national sports policy aimed at increasing youth participation in sports... (rest of the article text)", "url": "https://example.com/national-sports-policy-2024", "category": "Sports", "summary": "A new national sports policy has been launched to boost youth engagement in physical activities and competitions.", "predicted_category": "Sports"},
# {"index": 18, "title": "Entertainment Industry Faces Challenges Amid Strike", "text": "The entertainment industry is facing significant disruptions as workers go on strike over pay and conditions... (rest of the article text)", "url": "https://example.com/entertainment-strike-2024", "category": "Entertainment", "summary": "The entertainment industry is experiencing major disruptions due to strikes over pay and working conditions.", "predicted_category": "Entertainment"},
# {"index": 19, "title": "China's Growing Influence in Global Politics", "text": "China's political influence continues to grow, shaping global policy decisions and economic trends... (rest of the article text)", "url": "https://example.com/china-global-influence", "category": "Politics", "summary": "China's political and economic influence is rapidly increasing, affecting global decision-making and alliances.", "predicted_category": "Politics"},
# {"index": 20, "title": "AI-Powered Tools Change the Game for Content Creators", "text": "Content creators are increasingly relying on AI-powered tools to streamline their production process and boost engagement... (rest of the article text)", "url": "https://example.com/ai-content-creators", "category": "Science/Tech", "summary": "AI-powered tools are revolutionizing content creation, helping creators improve efficiency and engagement with their audiences.", "predicted_category": "Science/Tech"}
# ]
# return temp