|
|
import streamlit as st |
|
|
import requests |
|
|
import pandas as pd |
|
|
import json |
|
|
import os |
|
|
import matplotlib.pyplot as plt |
|
|
import seaborn as sns |
|
|
import base64 |
|
|
from io import BytesIO |
|
|
from PIL import Image, ImageEnhance |
|
|
import time |
|
|
import threading |
|
|
import subprocess |
|
|
from typing import Dict, Any, List |
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title="News Summarization & TTS", |
|
|
page_icon="📰", |
|
|
layout="wide", |
|
|
initial_sidebar_state="expanded" |
|
|
) |
|
|
|
|
|
|
|
|
def start_api(): |
|
|
process = subprocess.Popen(["python", "api.py"]) |
|
|
print(f"Started API server with PID {process.pid}") |
|
|
return process |
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def ensure_api_running(): |
|
|
try: |
|
|
|
|
|
response = requests.get("http://localhost:8000/docs", timeout=2) |
|
|
if response.status_code == 200: |
|
|
st.sidebar.success("✅ API server is running") |
|
|
print("API already running") |
|
|
return True |
|
|
except Exception as e: |
|
|
print(f"API not running: {str(e)}") |
|
|
pass |
|
|
|
|
|
|
|
|
print("Starting API server...") |
|
|
st.sidebar.info("Starting API server...") |
|
|
|
|
|
|
|
|
api_process = start_api() |
|
|
|
|
|
|
|
|
api_started = False |
|
|
retries = 0 |
|
|
max_retries = 15 |
|
|
|
|
|
while not api_started and retries < max_retries: |
|
|
try: |
|
|
time.sleep(2) |
|
|
response = requests.get("http://localhost:8000/docs", timeout=2) |
|
|
if response.status_code == 200: |
|
|
api_started = True |
|
|
st.sidebar.success("✅ API server is running") |
|
|
print("API server started successfully") |
|
|
return True |
|
|
except: |
|
|
retries += 1 |
|
|
print(f"Waiting for API to start... (attempt {retries}/{max_retries})") |
|
|
|
|
|
if not api_started: |
|
|
st.sidebar.error("❌ Failed to start API server") |
|
|
print("Failed to start API server") |
|
|
return False |
|
|
|
|
|
|
|
|
API_BASE_URL = "http://localhost:8000" |
|
|
|
|
|
|
|
|
def plot_sentiment_distribution(sentiment_data): |
|
|
|
|
|
categories = [] |
|
|
counts = [] |
|
|
|
|
|
|
|
|
for category, count in sentiment_data.items(): |
|
|
if count > 0: |
|
|
categories.append(category) |
|
|
counts.append(count) |
|
|
|
|
|
|
|
|
df = pd.DataFrame({ |
|
|
'Sentiment': categories, |
|
|
'Count': counts |
|
|
}) |
|
|
|
|
|
|
|
|
colors = [] |
|
|
for sentiment in df['Sentiment']: |
|
|
if sentiment == 'Positive' or sentiment == 'Slightly Positive': |
|
|
colors.append('#10B981') |
|
|
elif sentiment == 'Negative' or sentiment == 'Slightly Negative': |
|
|
colors.append('#EF4444') |
|
|
else: |
|
|
colors.append('#6B7280') |
|
|
|
|
|
|
|
|
fig, ax = plt.subplots(figsize=(6, 4)) |
|
|
bars = ax.bar(df['Sentiment'], df['Count'], color=colors) |
|
|
|
|
|
|
|
|
for bar in bars: |
|
|
height = bar.get_height() |
|
|
ax.text(bar.get_x() + bar.get_width()/2., height + 0.1, |
|
|
str(int(height)), ha='center', va='bottom') |
|
|
|
|
|
|
|
|
ax.set_xlabel('Sentiment') |
|
|
ax.set_ylabel('Number of Articles') |
|
|
ax.set_title('Sentiment Distribution') |
|
|
|
|
|
|
|
|
plt.xticks(rotation=45) |
|
|
plt.tight_layout() |
|
|
|
|
|
return fig |
|
|
|
|
|
|
|
|
def display_word_cloud(topics): |
|
|
from wordcloud import WordCloud |
|
|
|
|
|
|
|
|
text = " ".join(topics) |
|
|
|
|
|
|
|
|
wordcloud = WordCloud( |
|
|
width=400, |
|
|
height=200, |
|
|
background_color='white', |
|
|
colormap='viridis', |
|
|
max_words=100, |
|
|
contour_width=1 |
|
|
).generate(text) |
|
|
|
|
|
|
|
|
fig, ax = plt.subplots(figsize=(10, 5)) |
|
|
ax.imshow(wordcloud, interpolation='bilinear') |
|
|
ax.axis('off') |
|
|
|
|
|
return fig |
|
|
|
|
|
|
|
|
def generate_example_output(company_name: str) -> str: |
|
|
""" |
|
|
Generate output in the example format for the given company. |
|
|
Returns the formatted JSON as a string. |
|
|
""" |
|
|
try: |
|
|
|
|
|
url = f"{API_BASE_URL}/api/complete_analysis" |
|
|
response = requests.post(url, json={"company_name": company_name}) |
|
|
response.raise_for_status() |
|
|
data = response.json() |
|
|
|
|
|
|
|
|
formatted_output = { |
|
|
"Company": data["Company"], |
|
|
"Articles": data["Articles"], |
|
|
"Comparative Sentiment Score": { |
|
|
"Sentiment Distribution": data["Comparative Sentiment Score"]["Sentiment Distribution"], |
|
|
"Coverage Differences": data["Comparative Sentiment Score"]["Coverage Differences"], |
|
|
"Topic Overlap": data["Comparative Sentiment Score"]["Topic Overlap"] |
|
|
}, |
|
|
"Final Sentiment Analysis": data["Final Sentiment Analysis"], |
|
|
"Audio": "[Play Hindi Speech]" if data.get("Audio") else "No audio available" |
|
|
} |
|
|
|
|
|
|
|
|
return json.dumps(formatted_output, indent=2) |
|
|
|
|
|
except Exception as e: |
|
|
return json.dumps({ |
|
|
"error": str(e), |
|
|
"message": "Failed to generate example output" |
|
|
}, indent=2) |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
<style> |
|
|
.main-header { |
|
|
font-size: 2.2rem; |
|
|
font-weight: 600; |
|
|
color: #1E3A8A; |
|
|
margin-bottom: 1rem; |
|
|
} |
|
|
.sub-header { |
|
|
font-size: 1.5rem; |
|
|
font-weight: 500; |
|
|
color: #3B82F6; |
|
|
margin-top: 1.5rem; |
|
|
margin-bottom: 0.5rem; |
|
|
} |
|
|
.info-text { |
|
|
color: #6B7280; |
|
|
font-style: italic; |
|
|
} |
|
|
.section-divider { |
|
|
margin-top: 2rem; |
|
|
margin-bottom: 2rem; |
|
|
border-bottom: 1px solid #E5E7EB; |
|
|
} |
|
|
.stButton>button { |
|
|
background-color: #2563EB; |
|
|
color: white; |
|
|
border-radius: 0.375rem; |
|
|
padding: 0.5rem 1rem; |
|
|
font-weight: 500; |
|
|
} |
|
|
.stButton>button:hover { |
|
|
background-color: #1D4ED8; |
|
|
} |
|
|
.article-card { |
|
|
padding: 1rem; |
|
|
border-radius: 0.5rem; |
|
|
border: 1px solid #E5E7EB; |
|
|
margin-bottom: 1rem; |
|
|
} |
|
|
.sentiment-positive { |
|
|
color: #10B981; |
|
|
font-weight: 500; |
|
|
} |
|
|
.sentiment-negative { |
|
|
color: #EF4444; |
|
|
font-weight: 500; |
|
|
} |
|
|
.sentiment-neutral { |
|
|
color: #6B7280; |
|
|
font-weight: 500; |
|
|
} |
|
|
.topic-tag { |
|
|
background-color: #E5E7EB; |
|
|
color: #374151; |
|
|
border-radius: 9999px; |
|
|
padding: 0.25rem 0.75rem; |
|
|
margin-right: 0.5rem; |
|
|
margin-bottom: 0.5rem; |
|
|
display: inline-block; |
|
|
font-size: 0.875rem; |
|
|
} |
|
|
.audio-container { |
|
|
margin-top: 1rem; |
|
|
padding: 1rem; |
|
|
border-radius: 0.5rem; |
|
|
background-color: #F3F4F6; |
|
|
} |
|
|
.stAlert { |
|
|
border-radius: 0.5rem; |
|
|
} |
|
|
</style> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.markdown("<h1 class='main-header'>📰 News Summarization & Text-to-Speech</h1>", unsafe_allow_html=True) |
|
|
st.markdown("This application extracts news articles about a company, performs sentiment analysis, conducts comparative analysis, and generates a text-to-speech output in Hindi. Enter a company name to get started.", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
api_running = ensure_api_running() |
|
|
|
|
|
|
|
|
st.sidebar.markdown("## Input Settings") |
|
|
company_name = st.sidebar.text_input("Company Name", value="Tesla") |
|
|
|
|
|
|
|
|
st.sidebar.markdown("## Audio Settings") |
|
|
audio_speed = st.sidebar.select_slider("TTS Speech Speed:", options=["Slow", "Normal", "Fast"], value="Normal") |
|
|
st.sidebar.markdown("---") |
|
|
|
|
|
|
|
|
st.sidebar.markdown("## Developer Options") |
|
|
show_json = st.sidebar.checkbox("Show JSON output in example format") |
|
|
st.sidebar.markdown("---") |
|
|
|
|
|
|
|
|
st.sidebar.markdown("## About") |
|
|
st.sidebar.info("This application was developed for news analysis and translation. It uses web scraping, NLP, and TTS technologies to provide insights about companies.") |
|
|
|
|
|
|
|
|
analyze_button = st.sidebar.button("Analyze Company News", disabled=not api_running) |
|
|
|
|
|
|
|
|
if analyze_button and company_name and api_running: |
|
|
with st.spinner(f"Analyzing news for {company_name}. This may take a moment..."): |
|
|
try: |
|
|
|
|
|
response = requests.post(f"{API_BASE_URL}/api/complete_analysis", |
|
|
json={"company_name": company_name}, |
|
|
timeout=180) |
|
|
response.raise_for_status() |
|
|
|
|
|
|
|
|
response = response.json() |
|
|
|
|
|
|
|
|
st.markdown(f"<h2 class='sub-header'>Analysis Results for {response['Company']}</h2>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.markdown("<h3 class='sub-header'>Sentiment Overview</h3>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
sentiment_data = response["Comparative Sentiment Score"]["Sentiment Distribution"] |
|
|
|
|
|
|
|
|
col1, col2 = st.columns([3, 2]) |
|
|
|
|
|
with col1: |
|
|
|
|
|
positive_count = sentiment_data.get("Positive", 0) + sentiment_data.get("Slightly Positive", 0) |
|
|
negative_count = sentiment_data.get("Negative", 0) + sentiment_data.get("Slightly Negative", 0) |
|
|
neutral_count = sentiment_data.get("Neutral", 0) |
|
|
total_count = positive_count + negative_count + neutral_count |
|
|
|
|
|
|
|
|
sentiment_text = f"The company has " |
|
|
if positive_count > negative_count and positive_count > neutral_count: |
|
|
sentiment_text += f"mostly positive coverage ({positive_count}/{total_count} positive, {negative_count}/{total_count} negative, {neutral_count}/{total_count} neutral)." |
|
|
elif negative_count > positive_count and negative_count > neutral_count: |
|
|
sentiment_text += f"mostly negative coverage ({positive_count}/{total_count} positive, {negative_count}/{total_count} negative, {neutral_count}/{total_count} neutral)." |
|
|
else: |
|
|
sentiment_text += f"balanced coverage ({positive_count}/{total_count} positive, {negative_count}/{total_count} negative, {neutral_count}/{total_count} neutral)." |
|
|
|
|
|
st.write(sentiment_text) |
|
|
|
|
|
|
|
|
try: |
|
|
fig = plot_sentiment_distribution(sentiment_data) |
|
|
st.pyplot(fig) |
|
|
except Exception as e: |
|
|
st.warning(f"Could not create sentiment chart: {str(e)}") |
|
|
|
|
|
with col2: |
|
|
|
|
|
st.markdown("<h4>Key Insights</h4>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.write(response["Final Sentiment Analysis"]) |
|
|
|
|
|
|
|
|
common_topics = response["Comparative Sentiment Score"]["Topic Overlap"].get("Common Topics", []) |
|
|
if common_topics: |
|
|
st.markdown("<h4>Common Topics</h4>", unsafe_allow_html=True) |
|
|
for topic in common_topics: |
|
|
st.markdown(f"<span class='topic-tag'>{topic}</span>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
if "Audio" in response and response["Audio"]: |
|
|
st.markdown("<h3 class='sub-header'>Hindi Audio Summary</h3>", unsafe_allow_html=True) |
|
|
|
|
|
audio_message = response["Audio"] |
|
|
|
|
|
if audio_message == "Failed to generate audio": |
|
|
st.warning("Hindi audio could not be generated. However, you can still read the Hindi text below.") |
|
|
else: |
|
|
try: |
|
|
|
|
|
audio_file_path = response.get("_audio_file_path") |
|
|
|
|
|
if audio_file_path: |
|
|
|
|
|
audio_filename = os.path.basename(audio_file_path) |
|
|
audio_url = f"{API_BASE_URL}/api/audio/{audio_filename}" |
|
|
else: |
|
|
|
|
|
st.info("Audio is available but the path was not provided.") |
|
|
audio_url = None |
|
|
|
|
|
if audio_url: |
|
|
|
|
|
audio_response = requests.get(audio_url) |
|
|
if audio_response.status_code == 200: |
|
|
|
|
|
temp_audio_path = f"temp_audio_{os.path.basename(audio_url)}" |
|
|
with open(temp_audio_path, "wb") as f: |
|
|
f.write(audio_response.content) |
|
|
|
|
|
|
|
|
st.markdown("<div class='audio-container'>", unsafe_allow_html=True) |
|
|
st.audio(temp_audio_path, format="audio/mp3") |
|
|
|
|
|
|
|
|
st.markdown(f"<a href='{audio_url}' download='hindi_summary.mp3'>Download Hindi Audio</a>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
|
|
|
else: |
|
|
st.warning(f"Unable to load audio file (HTTP {audio_response.status_code}). You can still read the Hindi text below.") |
|
|
else: |
|
|
st.info("Hindi audio summary would be available here.") |
|
|
except Exception as e: |
|
|
st.warning(f"Error playing audio: {str(e)}. You can still read the Hindi text below.") |
|
|
|
|
|
|
|
|
with st.expander("Show Hindi Text"): |
|
|
hindi_text = response.get("Hindi Summary", "Hindi text not available.") |
|
|
|
|
|
|
|
|
paragraphs = hindi_text.split("। ") |
|
|
|
|
|
for paragraph in paragraphs: |
|
|
if paragraph.strip(): |
|
|
|
|
|
if not paragraph.strip().endswith("।"): |
|
|
paragraph += "।" |
|
|
st.markdown(f"<p style='font-size: 16px; margin-bottom: 10px;'>{paragraph}</p>", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown("</div>", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.markdown("<h3 class='sub-header'>News Articles</h3>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
articles = response["Articles"] |
|
|
for i, article in enumerate(articles): |
|
|
with st.container(): |
|
|
st.markdown(f"<div class='article-card'>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
sentiment_class = "sentiment-neutral" |
|
|
if article["Sentiment"] == "Positive" or article["Sentiment"] == "Slightly Positive": |
|
|
sentiment_class = "sentiment-positive" |
|
|
elif article["Sentiment"] == "Negative" or article["Sentiment"] == "Slightly Negative": |
|
|
sentiment_class = "sentiment-negative" |
|
|
|
|
|
st.markdown(f"<h4>{i+1}. {article['Title']}</h4>", unsafe_allow_html=True) |
|
|
st.markdown(f"<span class='{sentiment_class}'>{article['Sentiment']}</span>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.write(article["Summary"]) |
|
|
|
|
|
|
|
|
for topic in article["Topics"]: |
|
|
st.markdown(f"<span class='topic-tag'>{topic}</span>", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown("</div>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.markdown("<h3 class='sub-header'>Comparative Analysis</h3>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.markdown("<h4>Common Topics</h4>", unsafe_allow_html=True) |
|
|
common_topics = response["Comparative Sentiment Score"]["Topic Overlap"].get("Common Topics", []) |
|
|
if common_topics: |
|
|
for topic in common_topics: |
|
|
st.markdown(f"<span class='topic-tag'>{topic}</span>", unsafe_allow_html=True) |
|
|
else: |
|
|
st.write("No common topics found across articles.") |
|
|
|
|
|
|
|
|
st.markdown("<h4>Coverage Comparison</h4>", unsafe_allow_html=True) |
|
|
comparisons = response["Comparative Sentiment Score"].get("Coverage Differences", []) |
|
|
if comparisons: |
|
|
|
|
|
first_comparison = comparisons[0] |
|
|
st.write(first_comparison.get("Comparison", "")) |
|
|
st.markdown(f"<p class='info-text'>{first_comparison.get('Impact', '')}</p>", unsafe_allow_html=True) |
|
|
else: |
|
|
st.write("No comparative insights available.") |
|
|
|
|
|
|
|
|
with st.expander("View All Comparisons"): |
|
|
comparisons = response["Comparative Sentiment Score"].get("Coverage Differences", []) |
|
|
for i, comparison in enumerate(comparisons): |
|
|
st.markdown(f"<p><strong>{i+1}.</strong> {comparison.get('Comparison', '')}</p>", unsafe_allow_html=True) |
|
|
st.markdown(f"<p class='info-text'>{comparison.get('Impact', '')}</p>", unsafe_allow_html=True) |
|
|
st.markdown("<hr>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
if show_json: |
|
|
st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True) |
|
|
st.markdown("<h3 class='sub-header'>Example JSON Format</h3>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
json_output = generate_example_output(company_name) |
|
|
|
|
|
|
|
|
st.code(json_output, language="json") |
|
|
|
|
|
except requests.exceptions.HTTPError as http_err: |
|
|
if http_err.response.status_code == 404: |
|
|
st.error(f"No news articles found for {company_name}. Please try another company name.") |
|
|
elif http_err.response.status_code == 500: |
|
|
error_detail = "Unknown server error" |
|
|
try: |
|
|
error_data = http_err.response.json() |
|
|
if "detail" in error_data: |
|
|
error_detail = error_data["detail"] |
|
|
except: |
|
|
pass |
|
|
st.error(f"Server error: {error_detail}") |
|
|
else: |
|
|
st.error(f"HTTP error occurred: {http_err}") |
|
|
except requests.exceptions.ConnectionError: |
|
|
st.error("Failed to connect to the server. Please make sure the API is running.") |
|
|
except requests.exceptions.Timeout: |
|
|
st.error("Request timed out. The analysis might be taking too long to complete.") |
|
|
except Exception as e: |
|
|
st.error(f"An error occurred: {str(e)}") |
|
|
elif analyze_button and not api_running: |
|
|
st.error("Cannot perform analysis because the API server is not running. Please check the logs.") |
|
|
else: |
|
|
|
|
|
st.info("Enter a company name and click 'Analyze Company News' to get started.") |
|
|
|
|
|
|
|
|
with st.expander("See Example Analysis"): |
|
|
st.write(""" |
|
|
This application will provide: |
|
|
|
|
|
1. Sentiment analysis of news articles about the company |
|
|
2. Key topics mentioned in the articles |
|
|
3. Comparative analysis of different articles |
|
|
4. Hindi audio summary of the findings |
|
|
|
|
|
Companies you can try: Apple, Microsoft, Google, Amazon, Tesla, etc. |
|
|
""") |