Spaces:

devanshu1121
/

newssummarization

Sleeping

App Files Files Community

newssummarization / app.py

devanshu1121

Update app.py

5f73691 verified 11 months ago

raw

history blame contribute delete

18.9 kB

	# import json
	# import os
	# from utils import save_company_news
	# from utils import sentiment_analysis_model
	# from utils import news_summarization, audio_output, Topic_finder
	# from collections import Counter
	# import time
	# import re
	# from deep_translator import GoogleTranslator
	# from pydub import AudioSegment
	# import gc
	# import torch


	# print("Company News Summarization")

	# company_name = input("Enter Company Name: ")

	# if company_name:
	# file_path = save_company_news(company_name)

	# if os.path.exists(file_path):
	# with open(file_path, "r", encoding="utf-8") as file:
	# articles = json.load(file)

	# for article in articles:
	# print(f"\nTitle: {article['title']}")
	# print(f"Content: {article['content'][:100]}...")
	# print(f"Read more: {article['url']}")

	# del articles
	# gc.collect()
	# else:
	# print("Failed to fetch news. Try again.")
	# else:
	# print("Please enter a company name.")

	# with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file:
	# data = json.load(file)

	# for article in data:
	# topics = Topic_finder(article['title'])

	# sentiment = sentiment_analysis_model(article['content'])
	# article["sentiment"] = sentiment['sentiment']

	# del sentiment
	# gc.collect()

	# summary = news_summarization(article["content"])
	# article["summary"] = summary

	# article["topics"] = topics

	# if torch.cuda.is_available():
	# torch.cuda.empty_cache()

	# gc.collect()

	# with open(f"Company/{company_name}.json", "w", encoding="utf-8") as file:
	# json.dump(data, file, indent=4)

	# with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file:
	# articles = json.load(file)

	# sentiment_counts = Counter(article["sentiment"] for article in articles)

	# print("Sentiment Counts:")
	# print("Positive:", sentiment_counts.get("Positive", 0))
	# print("Negative:", sentiment_counts.get("Negative", 0))
	# print("Neutral:", sentiment_counts.get("Neutral", 0))

	# del articles
	# del sentiment_counts
	# gc.collect()

	# with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file:
	# data = json.load(file)

	# translator = GoogleTranslator(source="en", target="hi")

	# audio_folder = "audio"
	# os.makedirs(audio_folder, exist_ok=True)

	# for file in os.listdir(audio_folder):
	# file_path = os.path.join(audio_folder, file)
	# if os.path.isfile(file_path):
	# os.remove(file_path)

	# text_data = ""
	# audio_files = []

	# def split_text(text, max_length=4500):
	# sentences = re.split(r'(?<=[.!?])\s+', text)
	# chunks = []
	# current_chunk = ""

	# for sentence in sentences:
	# if len(current_chunk) + len(sentence) + 1 <= max_length:
	# current_chunk += " " + sentence if current_chunk else sentence
	# else:
	# chunks.append(current_chunk)
	# current_chunk = sentence

	# if current_chunk:
	# chunks.append(current_chunk)

	# return chunks

	# for i, article in enumerate(data, start=1):
	# title_translated = translator.translate(article['title'])

	# content_chunks = split_text(article['content'])
	# translated_chunks = []

	# for chunk in content_chunks:
	# try:
	# translated_chunk = translator.translate(chunk)
	# translated_chunks.append(translated_chunk)
	# time.sleep(0.5)
	# except Exception as e:
	# print(f"Error translating chunk: {str(e)}")
	# translated_chunks.append(f"Translation error: {str(e)}")

	# content_translated = " ".join(translated_chunks)

	# del content_chunks
	# gc.collect()

	# article_text = (f"अब, आप लेख संख्या {i} सुन रहे हैं जिसका शीर्षक है: {title_translated}\n"
	# f"अब, आप लेख संख्या {i} की सामग्री सुन रहे हैं।\n"
	# f"सामग्री: {content_translated}\n\n")

	# text_data += article_text

	# audio_file = f"{audio_folder}/article_{i}.mp3"
	# audio_output(article_text, audio_file)
	# audio_files.append(audio_file)

	# del article_text
	# del content_translated
	# del translated_chunks
	# gc.collect()

	# if torch.cuda.is_available():
	# torch.cuda.empty_cache()

	# time.sleep(1)

	# output_file = f"Company/{company_name}_translated.txt"
	# with open(output_file, "w", encoding="utf-8") as file:
	# file.write(text_data)

	# del text_data
	# gc.collect()

	# def combine_audio_files(audio_folder, output_file):
	# try:
	# print(f"Combining audio files from {audio_folder}...")
	# audio_files = [f for f in os.listdir(audio_folder) if f.endswith('.mp3') and f != os.path.basename(output_file)]

	# if not audio_files:
	# print("No audio files found to combine.")
	# return False

	# audio_files.sort(key=lambda x: int(x.split('_')[-1].split('.')[0]) if x.split('_')[-1].split('.')[0].isdigit() else 0)
	# print(f"Found {len(audio_files)} audio files to combine.")

	# combined = AudioSegment.empty()

	# for file in audio_files:
	# file_path = os.path.join(audio_folder, file)
	# try:
	# audio = AudioSegment.from_mp3(file_path)
	# combined += audio
	# print(f"Added {file}")

	# del audio
	# gc.collect()
	# except Exception as e:
	# print(f"Error processing {file}: {str(e)}")

	# combined.export(output_file, format="mp3")
	# print(f"Successfully combined audio files into {output_file}")

	# del combined
	# gc.collect()

	# return True

	# except Exception as e:
	# print(f"Error combining audio files: {str(e)}")
	# return False

	# audio_folder = "audio"
	# output_file = "combined_news.mp3"
	# combine_audio_files(audio_folder, output_file)
	# print("Audio combining process completed!")

	# if torch.cuda.is_available():
	# torch.cuda.empty_cache()

	# gc.collect()

	import streamlit as st
	import json
	import os
	from utils import save_company_news
	from utils import sentiment_analysis_model
	from utils import news_summarization, audio_output, Topic_finder
	from collections import Counter
	import time
	import re
	from deep_translator import GoogleTranslator
	from pydub import AudioSegment
	import gc
	import torch

	# Set page config
	st.set_page_config(
	page_title=" News Summarization and Text-to-Speech Application ",
	page_icon="📰",
	layout="wide"
	)

	# Create necessary folders
	os.makedirs("Company", exist_ok=True)
	os.makedirs("audio", exist_ok=True)

	def split_text(text, max_length=4500):
	sentences = re.split(r'(?<=[.!?])\s+', text)
	chunks = []
	current_chunk = ""

	for sentence in sentences:
	if len(current_chunk) + len(sentence) + 1 <= max_length:
	current_chunk += " " + sentence if current_chunk else sentence
	else:
	chunks.append(current_chunk)
	current_chunk = sentence

	if current_chunk:
	chunks.append(current_chunk)

	return chunks

	def combine_audio_files(audio_folder, output_file):
	try:
	st.info(f"Combining audio files from {audio_folder}...")
	audio_files = [f for f in os.listdir(audio_folder) if f.endswith('.mp3') and f != os.path.basename(output_file)]

	if not audio_files:
	st.warning("No audio files found to combine.")
	return False

	audio_files.sort(key=lambda x: int(x.split('_')[-1].split('.')[0]) if x.split('_')[-1].split('.')[0].isdigit() else 0)
	st.info(f"Found {len(audio_files)} audio files to combine.")

	combined = AudioSegment.empty()

	for file in audio_files:
	file_path = os.path.join(audio_folder, file)
	try:
	audio = AudioSegment.from_mp3(file_path)
	combined += audio

	del audio
	gc.collect()
	except Exception as e:
	st.error(f"Error processing {file}: {str(e)}")

	combined.export(output_file, format="mp3")
	st.success(f"Successfully combined audio files into {output_file}")

	del combined
	gc.collect()

	return True

	except Exception as e:
	st.error(f"Error combining audio files: {str(e)}")
	return False

	def process_company_news(company_name):
	with st.spinner("Fetching company news..."):
	file_path = save_company_news(company_name)

	if not os.path.exists(file_path):
	st.error("Failed to fetch news. Try again.")
	return False

	with open(file_path, "r", encoding="utf-8") as file:
	articles = json.load(file)

	st.success(f"Found {len(articles)} articles for {company_name}")

	# Display a preview of the articles
	with st.expander("Preview Articles"):
	for article in articles:
	st.subheader(article['title'])
	st.write(f"{article['content'][:100]}...")
	st.write(f"[Read more]({article['url']})")

	del articles
	gc.collect()

	with st.spinner("Analyzing sentiment, extracting topics, and generating summaries..."):
	progress_bar = st.progress(0)

	with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file:
	data = json.load(file)

	total_articles = len(data)

	for i, article in enumerate(data):
	topics = Topic_finder(article['title'])

	sentiment = sentiment_analysis_model(article['content'])
	article["sentiment"] = sentiment['sentiment']

	del sentiment
	gc.collect()

	summary = news_summarization(article["content"])
	article["summary"] = summary

	article["topics"] = topics

	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	gc.collect()
	progress_bar.progress((i + 1) / total_articles)

	with open(f"Company/{company_name}.json", "w", encoding="utf-8") as file:
	json.dump(data, file, indent=4)

	with st.spinner("Counting sentiment..."):
	with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file:
	articles = json.load(file)

	sentiment_counts = Counter(article["sentiment"] for article in articles)

	st.write("### Sentiment Analysis")
	col1, col2, col3 = st.columns(3)
	col1.metric("Positive", sentiment_counts.get("Positive", 0))
	col2.metric("Negative", sentiment_counts.get("Negative", 0))
	col3.metric("Neutral", sentiment_counts.get("Neutral", 0))

	del articles
	del sentiment_counts
	gc.collect()

	with st.spinner("Translating content and generating audio..."):
	with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file:
	data = json.load(file)

	translator = GoogleTranslator(source="en", target="hi")

	audio_folder = "audio"
	os.makedirs(audio_folder, exist_ok=True)

	# Clear previous audio files
	for file in os.listdir(audio_folder):
	file_path = os.path.join(audio_folder, file)
	if os.path.isfile(file_path):
	os.remove(file_path)

	text_data = ""
	audio_files = []

	progress_bar = st.progress(0)

	for i, article in enumerate(data, start=1):
	title_translated = translator.translate(article['title'])

	content_chunks = split_text(article['content'])
	translated_chunks = []

	for chunk in content_chunks:
	try:
	translated_chunk = translator.translate(chunk)
	translated_chunks.append(translated_chunk)
	time.sleep(0.5)
	except Exception as e:
	st.error(f"Error translating chunk: {str(e)}")
	translated_chunks.append(f"Translation error: {str(e)}")

	content_translated = " ".join(translated_chunks)

	del content_chunks
	gc.collect()

	article_text = (f"अब, आप लेख संख्या {i} सुन रहे हैं जिसका शीर्षक है: {title_translated}\n"
	f"अब, आप लेख संख्या {i} की सामग्री सुन रहे हैं।\n"
	f"सामग्री: {content_translated}\n\n")

	text_data += article_text

	audio_file = f"{audio_folder}/article_{i}.mp3"
	audio_output(article_text, audio_file)
	audio_files.append(audio_file)

	del article_text
	del content_translated
	del translated_chunks
	gc.collect()

	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	progress_bar.progress(i / len(data))
	time.sleep(1)

	output_file = f"Company/{company_name}_translated.txt"
	with open(output_file, "w", encoding="utf-8") as file:
	file.write(text_data)

	del text_data
	gc.collect()

	with st.spinner("Combining audio files..."):
	output_file = "combined_news.mp3"
	combine_success = combine_audio_files(audio_folder, output_file)

	if combine_success:
	st.success("Audio combining process completed!")
	else:
	st.error("Failed to combine audio files.")

	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	gc.collect()

	return True

	# Main app interface
	st.title("Company News Summarization and Audio Generation")

	with st.sidebar:
	st.header("Enter Company Details")
	company_name = st.text_input("Company Name")
	process_button = st.button("Process Company News", type="primary")

	# Process data when button is clicked
	if process_button and company_name:
	success = process_company_news(company_name)
	if success:
	st.session_state.processing_complete = True
	st.session_state.company_name = company_name
	elif process_button and not company_name:
	st.error("Please enter a company name.")

	# Show results after processing
	if 'processing_complete' in st.session_state and st.session_state.processing_complete:
	company_name = st.session_state.company_name

	st.header(f"Results for {company_name}")

	# Create tabs for different outputs
	tab1, tab2, tab3 = st.tabs(["Summary", "Translated Text", "Audio"])

	with tab1:
	st.subheader("News Summary")
	try:
	with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file:
	articles = json.load(file)

	for i, article in enumerate(articles, 1):
	with st.expander(f"Article {i}: {article['title']}"):
	st.write(f"Summary: {article['summary']}")
	st.write(f"Sentiment: {article['sentiment']}")
	st.write(f"Topics: {', '.join(article['topics'])}")
	st.write(f"URL: {article['url']}")
	except Exception as e:
	st.error(f"Error loading summary data: {str(e)}")

	with tab2:
	st.subheader("Translated Text (Hindi)")
	try:
	with open(f"Company/{company_name}_translated.txt", "r", encoding="utf-8") as file:
	text_content = file.read()
	st.download_button(
	label="Download Translated Text",
	data=text_content,
	file_name=f"{company_name}_translated.txt",
	mime="text/plain"
	)
	st.text_area("Content", text_content, height=400)
	except Exception as e:
	st.error(f"Error loading translated text: {str(e)}")

	with tab3:
	st.subheader("Audio Files")

	st.write("### Combined Audio")
	try:
	with open("combined_news.mp3", "rb") as file:
	combined_audio_bytes = file.read()

	st.audio(combined_audio_bytes, format="audio/mp3")
	st.download_button(
	label="Download Combined Audio",
	data=combined_audio_bytes,
	file_name="combined_news.mp3",
	mime="audio/mp3"
	)
	except Exception as e:
	st.error(f"Error loading combined audio: {str(e)}")

	st.write("### Individual Article Audio Files")
	try:
	audio_files = [f for f in os.listdir("audio") if f.endswith('.mp3')]
	audio_files.sort(key=lambda x: int(x.split('_')[-1].split('.')[0]) if x.split('_')[-1].split('.')[0].isdigit() else 0)

	for audio_file in audio_files:
	with st.expander(f"{audio_file}"):
	with open(f"audio/{audio_file}", "rb") as file:
	audio_bytes = file.read()
	st.audio(audio_bytes, format="audio/mp3")
	st.download_button(
	label=f"Download {audio_file}",
	data=audio_bytes,
	file_name=audio_file,
	mime="audio/mp3"
	)
	except Exception as e:
	st.error(f"Error loading individual audio files: {str(e)}")

	# Instructions at the bottom
	with st.expander("How to use this app"):
	st.write("""
	1. Enter the name of a company in the sidebar.
	2. Click 'Process Company News' button to start the analysis.
	3. Wait for the processing to complete (this may take some time depending on the number of articles).
	4. View the results in the different tabs:
	- Summary: See sentiment analysis, topics, and summaries of each article
	- Translated Text: View the Hindi translation of all articles
	- Audio: Listen to or download the audio files in Hindi
	""")