Jman666 commited on
Commit
b09895f
·
verified ·
1 Parent(s): 028f10f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +268 -388
app.py CHANGED
@@ -1,404 +1,284 @@
1
- import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
2
  import requests
3
- import json
4
- import os
5
- import base64
6
- from typing import Dict, Any, List
7
- import time
8
-
9
- # Define API URL - Configure for different environments
10
- API_BASE_URL = os.environ.get("https://jman666-api-news-summarization.hf.space", "http://localhost:8000")
11
-
12
- # Set page config
13
- st.set_page_config(
14
- page_title="News Sentiment Analysis",
15
- page_icon="📰",
16
- layout="wide",
17
- initial_sidebar_state="expanded"
18
- )
19
-
20
- # Custom CSS
21
- st.markdown("""
22
- <style>
23
- .main {
24
- padding: 2rem;
25
- }
26
- .sentiment-positive {
27
- color: green;
28
- font-weight: bold;
29
- }
30
- .sentiment-negative {
31
- color: red;
32
- font-weight: bold;
33
- }
34
- .sentiment-neutral {
35
- color: gray;
36
- font-weight: bold;
37
- }
38
- .article-card {
39
- padding: 1rem;
40
- border-radius: 5px;
41
- margin-bottom: 1rem;
42
- background-color: #f5f5f5;
43
  }
44
- .topic-tag {
45
- display: inline-block;
46
- padding: 0.25rem 0.5rem;
47
- margin-right: 0.5rem;
48
- margin-bottom: 0.5rem;
49
- border-radius: 15px;
50
- font-size: 0.8rem;
51
- background-color: #e1e1e1;
52
- }
53
- .header-row {
54
- display: flex;
55
- justify-content: space-between;
56
- align-items: center;
57
- margin-bottom: 1rem;
58
- }
59
- </style>
60
- """, unsafe_allow_html=True)
61
 
62
- def fetch_company_news(company_name: str) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  """
64
- Fetch news analysis for a given company from the API
65
-
66
  Args:
67
- company_name: Name of the company
68
-
69
  Returns:
70
- Dictionary containing processed news data
71
  """
72
  try:
73
- response = requests.post(
74
- f"{API_BASE_URL}/api/news",
75
- json={"company_name": company_name},
76
- timeout=30
77
- )
78
-
79
- if response.status_code == 200:
80
- return response.json()
81
- else:
82
- st.error(f"Error: {response.status_code} - {response.text}")
83
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  except Exception as e:
85
- st.error(f"Error connecting to API: {str(e)}")
86
- # For demo/testing, provide fallback sample data
87
- return get_sample_data(company_name)
 
 
88
 
89
- def get_sample_data(company_name: str) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  """
91
- Generate sample data for demonstration when API is not available
92
-
93
  Args:
94
- company_name: Name of the company
95
-
 
96
  Returns:
97
- Sample data dictionary
98
  """
99
- return {
100
- "Company": company_name,
101
- "Articles": [
102
- {
103
- "Title": f"{company_name} Reports Strong Quarterly Growth",
104
- "Summary": f"{company_name} has reported exceptional performance in recent quarters, exceeding analyst expectations.",
105
- "Sentiment": "Positive",
106
- "Topics": ["Financial Performance", "Market Growth", "Investor Relations"],
107
- "Source": "Business News",
108
- "Published_Date": "2025-03-15",
109
- "URL": f"https://example.com/{company_name.lower()}/1"
110
- },
111
- {
112
- "Title": f"{company_name} Faces Regulatory Scrutiny",
113
- "Summary": f"Regulatory concerns continue to impact {company_name}'s operations and strategic plans.",
114
- "Sentiment": "Negative",
115
- "Topics": ["Regulations", "Compliance", "Legal Issues"],
116
- "Source": "Financial Times",
117
- "Published_Date": "2025-03-10",
118
- "URL": f"https://example.com/{company_name.lower()}/2"
119
- },
120
- {
121
- "Title": f"{company_name} Announces Changes to Leadership Team",
122
- "Summary": f"{company_name} has announced changes that could impact its operations in the coming months.",
123
- "Sentiment": "Neutral",
124
- "Topics": ["Leadership", "Corporate Governance", "Organization Structure"],
125
- "Source": "Market Watch",
126
- "Published_Date": "2025-03-05",
127
- "URL": f"https://example.com/{company_name.lower()}/3"
128
- },
129
- {
130
- "Title": f"{company_name} Announces Changes to Leadership Team",
131
- "Summary": f"{company_name} has announced changes that could impact its operations in the coming months.",
132
- "Sentiment": "Neutral",
133
- "Topics": ["Leadership", "Corporate Governance", "Organization Structure"],
134
- "Source": "Market Watch",
135
- "Published_Date": "2025-03-05",
136
- "URL": f"https://example.com/{company_name.lower()}/3"
137
- },
138
- {
139
- "Title": f"{company_name} Announces Changes to Leadership Team",
140
- "Summary": f"{company_name} has announced changes that could impact its operations in the coming months.",
141
- "Sentiment": "Neutral",
142
- "Topics": ["Leadership", "Corporate Governance", "Organization Structure"],
143
- "Source": "Market Watch",
144
- "Published_Date": "2025-03-05",
145
- "URL": f"https://example.com/{company_name.lower()}/3"
146
- },
147
- {
148
- "Title": f"{company_name} Announces Changes to Leadership Team",
149
- "Summary": f"{company_name} has announced changes that could impact its operations in the coming months.",
150
- "Sentiment": "Neutral",
151
- "Topics": ["Leadership", "Corporate Governance", "Organization Structure"],
152
- "Source": "Market Watch",
153
- "Published_Date": "2025-03-05",
154
- "URL": f"https://example.com/{company_name.lower()}/3"
155
- },
156
- {
157
- "Title": f"{company_name} Announces Changes to Leadership Team",
158
- "Summary": f"{company_name} has announced changes that could impact its operations in the coming months.",
159
- "Sentiment": "Neutral",
160
- "Topics": ["Leadership", "Corporate Governance", "Organization Structure"],
161
- "Source": "Market Watch",
162
- "Published_Date": "2025-03-05",
163
- "URL": f"https://example.com/{company_name.lower()}/3"
164
- },
165
- {
166
- "Title": f"{company_name} Announces Changes to Leadership Team",
167
- "Summary": f"{company_name} has announced changes that could impact its operations in the coming months.",
168
- "Sentiment": "Neutral",
169
- "Topics": ["Leadership", "Corporate Governance", "Organization Structure"],
170
- "Source": "Market Watch",
171
- "Published_Date": "2025-03-05",
172
- "URL": f"https://example.com/{company_name.lower()}/3"
173
- },
174
- {
175
- "Title": f"{company_name} Announces Changes to Leadership Team",
176
- "Summary": f"{company_name} has announced changes that could impact its operations in the coming months.",
177
- "Sentiment": "Neutral",
178
- "Topics": ["Leadership", "Corporate Governance", "Organization Structure"],
179
- "Source": "Market Watch",
180
- "Published_Date": "2025-03-05",
181
- "URL": f"https://example.com/{company_name.lower()}/3"
182
- },
183
- {
184
- "Title": f"{company_name} Announces Changes to Leadership Team",
185
- "Summary": f"{company_name} has announced changes that could impact its operations in the coming months.",
186
- "Sentiment": "Neutral",
187
- "Topics": ["Leadership", "Corporate Governance", "Organization Structure"],
188
- "Source": "Market Watch",
189
- "Published_Date": "2025-03-05",
190
- "URL": f"https://example.com/{company_name.lower()}/3"
191
- }
192
- ],
193
- "Comparative_Sentiment_Score": {
194
- "Sentiment_Distribution": {
195
- "Positive": 1,
196
- "Negative": 1,
197
- "Neutral": 1
198
- },
199
- "Coverage_Differences": [
200
- {
201
- "Comparison": "Positive articles focus on Financial Performance, Market Growth, while negative articles emphasize Regulations, Compliance, Legal Issues.",
202
- "Impact": "This suggests a contrast in perception across different aspects of the company."
203
- },
204
- {
205
- "Comparison": "Coverage varies in depth and focus across different sources.",
206
- "Impact": "This highlights the importance of consulting multiple sources for a comprehensive understanding."
207
- }
208
- ],
209
- "Topic_Overlap": {
210
- "Common_Topics": ["Corporate Strategy", "Market Position"],
211
- "Unique_Topics": ["Financial Performance", "Regulations", "Leadership"]
212
- },
213
- "Final_Sentiment_Analysis": "Current news coverage is mixed or neutral, reflecting a complex situation."
214
- },
215
- "Final_Sentiment_Analysis": "Current news coverage is mixed or neutral, reflecting a complex situation.",
216
- "Audio": "sample_audio.mp3"
217
- }
218
 
219
- def display_sentiment_badge(sentiment: str) -> None:
220
- """Display a colored badge for the sentiment"""
221
- if sentiment == "Positive":
222
- st.markdown(f'<span class="sentiment-positive">Positive</span>', unsafe_allow_html=True)
223
- elif sentiment == "Negative":
224
- st.markdown(f'<span class="sentiment-negative">Negative</span>', unsafe_allow_html=True)
225
- else:
226
- st.markdown(f'<span class="sentiment-neutral">Neutral</span>', unsafe_allow_html=True)
227
-
228
- def display_topics(topics: List[str]) -> None:
229
- """Display topic tags"""
230
- html = ""
231
- for topic in topics:
232
- html += f'<span class="topic-tag">{topic}</span>'
233
- st.markdown(html, unsafe_allow_html=True)
234
-
235
- def display_article_card(article: Dict[str, Any], index: int) -> None:
236
- """Display an article in a card format"""
237
- with st.container():
238
- st.markdown(f'<div class="article-card">', unsafe_allow_html=True)
239
-
240
- # Title and sentiment
241
- col1, col2 = st.columns([3, 1])
242
- with col1:
243
- st.markdown(f"### {article['Title']}")
244
- with col2:
245
- st.markdown("**Sentiment:**")
246
- display_sentiment_badge(article['Sentiment'])
247
-
248
- # Summary
249
- st.markdown("**Summary:**")
250
- st.write(article['Summary'])
251
-
252
- # Topics
253
- st.markdown("**Topics:**")
254
- display_topics(article['Topics'])
255
-
256
- # Source and date
257
- col1, col2 = st.columns(2)
258
- with col1:
259
- st.markdown(f"**Source:** {article['Source']}")
260
- with col2:
261
- st.markdown(f"**Published:** {article['Published_Date']}")
262
-
263
- # URL
264
- st.markdown(f"[Read full article]({article['URL']})")
265
-
266
- st.markdown('</div>', unsafe_allow_html=True)
267
-
268
- def display_comparative_analysis(analysis: Dict[str, Any]) -> None:
269
- """Display the comparative analysis section"""
270
- st.subheader("Sentiment Distribution")
271
-
272
- # Display sentiment distribution as a bar chart
273
- sentiments = analysis["Sentiment_Distribution"]
274
- st.bar_chart(sentiments)
275
-
276
- # Coverage differences
277
- st.subheader("Coverage Analysis")
278
- for item in analysis["Coverage_Differences"]:
279
- st.markdown(f"**Observation:** {item['Comparison']}")
280
- st.markdown(f"*Impact:* {item['Impact']}")
281
- st.markdown("---")
282
-
283
- # Topic overlap
284
- st.subheader("Topic Analysis")
285
-
286
- col1, col2 = st.columns(2)
287
- with col1:
288
- st.markdown("**Common Topics Across Articles:**")
289
- for topic in analysis["Topic_Overlap"]["Common_Topics"]:
290
- st.markdown(f"- {topic}")
291
-
292
- with col2:
293
- st.markdown("**Unique Topics:**")
294
- for topic in analysis["Topic_Overlap"]["Unique_Topics"]:
295
- st.markdown(f"- {topic}")
296
-
297
- # Final sentiment
298
- st.subheader("Overall Sentiment Analysis")
299
- st.info(analysis["Final_Sentiment_Analysis"])
300
-
301
- def main():
302
- st.title("📰 Company News Sentiment Analysis")
303
- st.markdown("""
304
- This application extracts key details from news articles related to a given company,
305
- performs sentiment analysis, conducts a comparative analysis, and generates a text-to-speech
306
- output in Hindi.
307
- """)
308
-
309
- # Company selection
310
- st.header("Enter Company Name")
311
-
312
- # Example companies for dropdown
313
- example_companies = [
314
- "Tesla",
315
- "Apple",
316
- "Google",
317
- "Microsoft",
318
- "Amazon",
319
- "Facebook",
320
- "Netflix",
321
- "Other (specify)"
322
- ]
323
-
324
- company_option = st.selectbox(
325
- "Select a company or choose 'Other' to specify:",
326
- example_companies
327
- )
328
-
329
- company_name = ""
330
- if company_option == "Other (specify)":
331
- company_name = st.text_input("Enter company name:")
332
  else:
333
- company_name = company_option
334
-
335
- # Process button
336
- if st.button("Analyze News") and company_name:
337
- with st.spinner(f"Analyzing news for {company_name}..."):
338
- # Display a progress bar to show work is happening
339
- progress_bar = st.progress(0)
340
- for i in range(100):
341
- time.sleep(0.05) # Simulate work
342
- progress_bar.progress(i + 1)
343
-
344
- # Fetch data from API
345
- result = fetch_company_news(company_name)
346
-
347
- if result:
348
- # Display results
349
- st.header(f"News Analysis for {result['Company']}")
350
-
351
- # Summary tabs
352
- tab1, tab2, tab3 = st.tabs(["Articles", "Comparative Analysis", "Audio Summary"])
353
-
354
- with tab1:
355
- st.subheader("Articles Analysis")
356
- for i, article in enumerate(result["Articles"]):
357
- display_article_card(article, i)
358
-
359
- with tab2:
360
- st.subheader("Comparative Sentiment Analysis")
361
- display_comparative_analysis(result["Comparative_Sentiment_Score"])
362
-
363
- with tab3:
364
- st.subheader("Audio Summary (Hindi)")
365
- st.markdown("Listen to the audio summary of the news analysis in Hindi:")
366
-
367
- # In a real implementation, you would provide the actual audio file
368
- # For demonstration, we'll show a placeholder
369
- st.audio("https://upload.wikimedia.org/wikipedia/commons/5/5b/Hindi_svar.ogg", format="audio/ogg")
370
-
371
- st.markdown("**Note:** This is a placeholder audio. In the actual implementation, the audio would be a Hindi text-to-speech conversion of the news summary.")
372
-
373
- # Information section
374
- st.sidebar.title("About")
375
- st.sidebar.info("""
376
- This application performs news extraction, sentiment analysis, and text-to-speech conversion
377
- for company news articles.
378
-
379
- **Features:**
380
- - Extract news from multiple sources
381
- - Analyze sentiment (positive, negative, neutral)
382
- - Identify key topics in articles
383
- - Compare sentiment across articles
384
- - Generate Hindi audio summary
385
-
386
- **Technologies Used:**
387
- - Natural Language Processing
388
- - Sentiment Analysis
389
- - Text-to-Speech Conversion
390
- - Web Scraping
391
- """)
392
-
393
- st.sidebar.title("Instructions")
394
- st.sidebar.markdown("""
395
- 1. Select a company from the dropdown or enter a custom company name
396
- 2. Click "Analyze News" to start the analysis
397
- 3. View the results in the three tabs:
398
- - Articles: Individual article analysis
399
- - Comparative Analysis: Cross-article insights
400
- - Audio Summary: Hindi speech summary
401
- """)
402
-
403
- if __name__ == "__main__":
404
- main()
 
1
+ #utils
2
+
3
+ !pip install deep-translator
4
+ !pip install googletrans
5
+ !pip install tldextract
6
+ !pip install playsound
7
+ !pip install gtts
8
+ !pip install streamlit
9
+ !pip install fastapi
10
+ !pip install pandas
11
+ !pip install matplotlib
12
+ !pip install pydantic
13
+ !pip install requests
14
  import requests
15
+ from bs4 import BeautifulSoup
16
+ import pandas as pd
17
+ from typing import List, Dict, Any
18
+ import numpy as np
19
+ from transformers import pipeline, AutoProcessor, AutoModel
20
+ import urllib.parse
21
+ from sklearn.feature_extraction.text import TfidfVectorizer
22
+ import tldextract
23
+ import torch
24
+ import soundfile as sf
25
+ from googletrans import Translator
26
+ from playsound import playsound
27
+ from transformers import AutoModel, AutoTokenizer
28
+ import soundfile as sf
29
+ import numpy as np
30
+ from gtts import gTTS
31
+ from deep_translator import GoogleTranslator
32
+ def search_news(company_name: str, num_articles: int = 2) -> List[str]:
33
+ search_url = f"https://www.google.com/search?q={company_name}+news&tbm=nws"
34
+ headers = {
35
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ try:
39
+ response = requests.get(search_url, headers=headers)
40
+ response.raise_for_status()
41
+ soup = BeautifulSoup(response.text, "html.parser")
42
+
43
+ article_links = []
44
+ for article in soup.select('.SoaBEf'):
45
+ link_element = article.select_one('a')
46
+ if link_element and 'href' in link_element.attrs:
47
+ href = link_element['href']
48
+ if href.startswith('/url?q='):
49
+ url = href.split('/url?q=')[1].split('&')[0]
50
+ url = urllib.parse.unquote(url)
51
+ article_links.append(url)
52
+ elif href.startswith('http'):
53
+ article_links.append(href)
54
+
55
+ if len(article_links) >= num_articles:
56
+ break
57
+
58
+ return article_links
59
+ except Exception as e:
60
+ print(f"Error fetching news articles: {e}")
61
+ return []
62
+
63
+ def extract_article_content(url: str) -> Dict[str, Any]:
64
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
65
+
66
+ try:
67
+ response = requests.get(url, headers=headers)
68
+ response.raise_for_status()
69
+ soup = BeautifulSoup(response.text, "html.parser")
70
+
71
+ title = soup.find("h1").get_text().strip() if soup.find("h1") else "No title found"
72
+
73
+ content_element = soup.find("article") or soup.find("main") or soup.find("div", class_=["content", "article", "story"])
74
+ content = " ".join([p.get_text().strip() for p in content_element.find_all("p")]) if content_element else "No content found"
75
+
76
+ date_element = soup.find("time")
77
+ date = date_element["datetime"] if date_element and "datetime" in date_element.attrs else None
78
+
79
+ return {
80
+ 'url': url,
81
+ 'title': title,
82
+ 'content': content,
83
+ 'date': date
84
+ }
85
+ except Exception as e:
86
+ print(f"Error extracting content from {url}: {e}")
87
+ return {
88
+ 'url': url,
89
+ 'title': "Error extracting content",
90
+ 'content': "Error extracting content",
91
+ 'date': None
92
+ }
93
+
94
+ def get_company_news(company_name: str) -> List[Dict[str, Any]]:
95
+ article_urls = search_news(company_name)
96
+ articles = []
97
+
98
+ for url in article_urls[:10]:
99
+ try:
100
+ article_data = extract_article_content(url)
101
+ articles.append(article_data)
102
+ except Exception as e:
103
+ print(f"Error extracting from {url}: {e}")
104
+
105
+ return articles
106
+
107
+ def summarize_article(content: str, max_length: int = 50) -> str:
108
+ summarizer = pipeline("summarization")
109
+ max_input_length = summarizer.model.config.max_position_embeddings # Get model's max input length
110
+
111
+ # Ensure content does not exceed max input length
112
+ truncated_content = content[:max_input_length]
113
+
114
+ summary = summarizer(truncated_content, max_length=max_length, min_length=0, do_sample=False)
115
+ return summary[0]['summary_text']
116
+
117
+ def analyze_sentiment(text: str) -> Dict[str, Any]:
118
  """
119
+ Analyze sentiment of the given text.
120
+
121
  Args:
122
+ text: The text to analyze.
123
+
124
  Returns:
125
+ Dictionary containing sentiment category and score.
126
  """
127
  try:
128
+ # Initialize sentiment analyzer
129
+ sentiment_analyzer = pipeline("sentiment-analysis", truncation=True)
130
+
131
+ # Truncate text manually to avoid exceeding token limits
132
+ max_token_limit = 512 # Most transformer models have a 512-token limit
133
+ words = text.split()
134
+ if len(words) > max_token_limit:
135
+ text = ' '.join(words[:max_token_limit])
136
+
137
+ # Perform sentiment analysis
138
+ result = sentiment_analyzer(text)
139
+
140
+ # Determine sentiment category based on label and score
141
+ sentiment_category = "Positive" if result[0]['label'] == "POSITIVE" else "Negative"
142
+ score = result[0]['score']
143
+
144
+ # Add neutral category for borderline cases
145
+ if 0.4 <= score <= 0.6:
146
+ sentiment_category = "Neutral"
147
+
148
+ return {
149
+ 'sentiment': sentiment_category,
150
+ 'score': score
151
+ }
152
  except Exception as e:
153
+ print(f"Error in sentiment analysis: {e}")
154
+ return {
155
+ 'sentiment': "Unknown",
156
+ 'score': 0.0
157
+ }
158
 
159
+ def extract_key_topics(text: str, num_topics: int = 5) -> List[str]:
160
+ if len(text.split()) < 10:
161
+ return ["Not enough text to extract topics"]
162
+
163
+ vectorizer = TfidfVectorizer(stop_words='english', max_features=100)
164
+ tfidf_matrix = vectorizer.fit_transform([text])
165
+ feature_names = vectorizer.get_feature_names_out()
166
+ tfidf_scores = tfidf_matrix.toarray()[0]
167
+ sorted_indices = np.argsort(tfidf_scores)[::-1]
168
+ top_topics = [feature_names[idx] for idx in sorted_indices[:num_topics]]
169
+
170
+ return top_topics
171
+
172
+ def perform_comparative_analysis(articles: List[Dict[str, Any]]) -> Dict[str, Any]:
173
+ sentiment_counts = {
174
+ 'Positive': len([a for a in articles if a['sentiment']['sentiment'] == 'Positive']),
175
+ 'Neutral': len([a for a in articles if a['sentiment']['sentiment'] == 'Neutral']),
176
+ 'Negative': len([a for a in articles if a['sentiment']['sentiment'] == 'Negative'])
177
+ }
178
+
179
+ all_topics = [topic for article in articles for topic in article['topics']]
180
+ topic_frequency = {}
181
+ for topic in all_topics:
182
+ topic_frequency[topic] = topic_frequency.get(topic, 0) + 1
183
+
184
+ common_topics = sorted(topic_frequency.items(), key=lambda x: x[1], reverse=True)
185
+
186
+ sentiment_by_source = {}
187
+ for article in articles:
188
+ source = extract_source_from_url(article['url'])
189
+ if source not in sentiment_by_source:
190
+ sentiment_by_source[source] = []
191
+ sentiment_by_source[source].append(article['sentiment']['sentiment'])
192
+
193
+ return {
194
+ 'sentiment_distribution': sentiment_counts,
195
+ 'common_topics': common_topics[:10],
196
+ 'sentiment_by_source': sentiment_by_source
197
+ }
198
+
199
+ def extract_source_from_url(url: str) -> str:
200
+ extracted_info = tldextract.extract(url)
201
+ return extracted_info.domain
202
+
203
+ from typing import List, Dict, Any
204
+ from transformers import pipeline
205
+
206
+ def get_combined_summary(articles, max_length: int = 100) -> str:
207
  """
208
+ Generate a combined summary from multiple news articles.
209
+
210
  Args:
211
+ articles: List of article dictionaries containing content
212
+ max_length: Maximum length of the final summary
213
+
214
  Returns:
215
+ A comprehensive summary combining insights from all articles
216
  """
217
+ # Combine all article contents with titles as context
218
+ combined_content = ""
219
+ for article in articles:
220
+ # Use .get() with default values to handle missing keys
221
+ title = article.get('title', 'No Title')
222
+ content = article.get('content', 'Content not available')
223
+ combined_content += f"Article: {title}\n{content}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
+ # Initialize the summarizer
226
+ summarizer = pipeline("summarization")
227
+
228
+ # Handle token limit constraints
229
+ max_input_length = summarizer.model.config.max_position_embeddings
230
+ truncated_content = combined_content[:max_input_length]
231
+
232
+ # Generate the combined summary
233
+ summary = summarizer(truncated_content, max_length=max_length, min_length=30, do_sample=False)
234
+
235
+ # Handle different return formats from the pipeline
236
+ if isinstance(summary, list):
237
+ return summary[0]['summary_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  else:
239
+ return summary['summary_text']
240
+
241
+ def generate_hindi_summary(combined_summary: str) -> str:
242
+ """
243
+ Translate the combined summary to Hindi using deep-translator.
244
+
245
+ Args:
246
+ combined_summary: The English combined summary
247
+
248
+ Returns:
249
+ The Hindi translation of the combined summary
250
+ """
251
+ try:
252
+ translator = GoogleTranslator(source='auto', target='hi')
253
+ hindi_summary = translator.translate(text=combined_summary)
254
+ return hindi_summary
255
+ except Exception as e:
256
+ print(f"Error in translation: {e}")
257
+ return "Translation failed"
258
+ def generate_hindi_speech(hindi_summary: str):
259
+ """
260
+ Convert Hindi summary to speech using AI4Bharat's VITS-Rasa-13 model and play it
261
+
262
+ Args:
263
+ hindi_summary: Hindi text summary to synthesize (max 500 characters)
264
+ """
265
+ try:
266
+ # Load pre-trained model (requires CUDA-enabled GPU)
267
+ model = AutoModel.from_pretrained("ai4bharat/vits_rasa_13", trust_remote_code=True).to("cuda")
268
+ tokenizer = AutoTokenizer.from_pretrained("ai4bharat/vits_rasa_13", trust_remote_code=True)
269
+
270
+ # Process text and generate speech
271
+ inputs = tokenizer(text=hindi_summary, return_tensors="pt").to("cuda")
272
+
273
+ # Use default Indian voice profile (speaker_id=16 for male, 17 for female)
274
+ outputs = model(inputs['input_ids'], speaker_id=16, emotion_id=0)
275
+
276
+ # Convert to numpy array and save as temporary file
277
+ audio_data = outputs.waveform.squeeze().cpu().numpy()
278
+ sf.write("temp_hindi_speech.wav", audio_data, model.config.sampling_rate)
279
+
280
+ # Play the audio using playsound
281
+ playsound("temp_hindi_speech.wav")
282
+
283
+ except Exception as e:
284
+ print(f"Error in speech generation or playback: {e}")