Spaces:

Shreyas94
/

World_News

Sleeping

App Files Files Community

World_News / app.py

Shreyas94

Update app.py

fe7d500 verified over 1 year ago

raw

history blame contribute delete

3.09 kB

	import logging
	from bs4 import BeautifulSoup
	import requests
	import nltk
	from transformers import pipeline
	import gradio as gr
	from newsapi import NewsApiClient
	import asyncio

	# Configure logging
	logging.basicConfig(level=logging.DEBUG)

	# Initialize the summarization pipeline from Hugging Face Transformers
	summarizer = pipeline("summarization")

	# Initialize the NLTK sentence tokenizer
	nltk.download('punkt')

	# Initialize the News API client with your API key
	newsapi = NewsApiClient(api_key='5ab7bb1aaceb41b8993db03477098aad')

	# Function to fetch content from a given URL
	def fetch_article_content(url):
	try:
	r = requests.get(url)
	soup = BeautifulSoup(r.text, 'html.parser')
	results = soup.find_all(['h1', 'p'])
	text = [result.text for result in results]
	return ' '.join(text)
	except Exception as e:
	logging.error(f"Error fetching content from {url}: {e}")
	return ""

	# Function to summarize news articles based on a query
	async def summarize_news(query, num_results=3):
	logging.debug(f"Query received: {query}")
	logging.debug(f"Number of results requested: {num_results}")

	# Search for news articles
	logging.debug("Searching for news articles...")

	articles = []
	aggregated_content = ""
	try:
	news_results = newsapi.get_everything(q=query, language='en', page_size=num_results)
	logging.debug(f"Search results: {news_results}")

	for article in news_results['articles']:
	url = article['url']
	logging.debug(f"Fetching content from URL: {url}")
	content = fetch_article_content(url)
	aggregated_content += content + " "
	except Exception as e:
	logging.error(f"Error fetching news articles: {e}")

	# Summarize the aggregated content
	try:
	# Chunk the aggregated content into chunks
	sentences = nltk.sent_tokenize(aggregated_content)
	chunk_size = 500 # Adjust chunk size as needed
	chunks = [sentences[i:i + chunk_size] for i in range(0, len(sentences), chunk_size)]

	# Summarize each chunk separately
	summaries = []
	for chunk in chunks:
	chunk_text = ' '.join(chunk)
	summary = summarizer(chunk_text, max_length=120, min_length=30, do_sample=False)
	summaries.append(summary[0]['summary_text'])

	# Combine all summaries
	final_summary = ' '.join(summaries)

	logging.debug(f"Final summarized text: {final_summary}")
	return final_summary

	except Exception as e:
	logging.error(f"Error during summarization: {e}")
	return "An error occurred during summarization."

	# Setting up Gradio interface
	iface = gr.Interface(
	fn=summarize_news,
	inputs=[
	gr.Textbox(label="Query"),
	gr.Slider(minimum=1, maximum=10, value=3, label="Number of Results")
	],
	outputs="textbox",
	title="News Summarizer",
	description="Enter a query to get a consolidated summary of the top news articles."
	)

	if __name__ == "__main__":
	iface.launch()