saquib34 commited on
Commit
ea4f4ec
·
2 Parent(s): 1a5947fb6a4c49

Add Docker configuration

Browse files
.devcontainer/devcontainer.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Python 3",
3
+ // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
4
+ "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
5
+ "customizations": {
6
+ "codespaces": {
7
+ "openFiles": [
8
+ "README.md",
9
+ "streamlit_app.py"
10
+ ]
11
+ },
12
+ "vscode": {
13
+ "settings": {},
14
+ "extensions": [
15
+ "ms-python.python",
16
+ "ms-python.vscode-pylance"
17
+ ]
18
+ }
19
+ },
20
+ "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
21
+ "postAttachCommand": {
22
+ "server": "streamlit run streamlit_app.py --server.enableCORS false --server.enableXsrfProtection false"
23
+ },
24
+ "portsAttributes": {
25
+ "8501": {
26
+ "label": "Application",
27
+ "onAutoForward": "openPreview"
28
+ }
29
+ },
30
+ "forwardPorts": [
31
+ 8501
32
+ ]
33
+ }
requirements.txt CHANGED
@@ -10,4 +10,5 @@ sentence-transformers>=2.1.0
10
  deep-translator>=1.8.0
11
  python-multipart>=0.0.5
12
  gradio>=3.50.0
13
- python-dotenv==0.19.0
 
 
10
  deep-translator>=1.8.0
11
  python-multipart>=0.0.5
12
  gradio>=3.50.0
13
+ python-dotenv==0.19.0
14
+ tenacity>=8.2.3
streamlit_app.py CHANGED
@@ -1,58 +1,321 @@
1
  import streamlit as st
2
  import requests
 
 
 
 
 
 
 
3
 
4
- # Hugging Face Space API Endpoint
5
  HF_API_URL = "https://saquib34-news-analyzer.hf.space/api/analyze"
 
 
 
6
 
 
 
 
 
7
  def analyze_company(company_name):
8
- """Call Hugging Face API endpoint"""
9
  try:
10
  response = requests.post(
11
  HF_API_URL,
12
- json={"name": company_name},
13
- headers={"Content-Type": "application/json"}
 
14
  )
15
  response.raise_for_status()
16
  return response.json()
 
 
 
17
  except requests.exceptions.RequestException as e:
18
- st.error(f"API Error: {str(e)}")
 
 
 
19
  return None
20
 
21
- # Streamlit UI
22
- st.title("📰 Company News Analyzer")
 
 
 
 
 
 
 
23
 
24
- # Input Section
25
- company = st.text_input("Enter Company Name", placeholder="e.g., Microsoft, Apple...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- if st.button("Analyze"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  if not company:
29
  st.warning("Please enter a company name")
30
  else:
31
- with st.spinner("Analyzing news coverage..."):
32
- result = analyze_company(company)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
- if result:
35
- # Display Results
36
- st.subheader(result["Final Sentiment Analysis"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- # Sentiment Summary
39
  col1, col2 = st.columns(2)
40
  with col1:
41
- st.metric("Positive Articles", result["ComparativeSentimentScore"]["POSITIVE"])
42
- st.metric("Negative Articles", result["ComparativeSentimentScore"]["NEGATIVE"])
 
 
 
 
 
 
 
 
 
 
 
 
43
  with col2:
44
- st.metric("Neutral Articles", result["ComparativeSentimentScore"]["NEUTRAL"])
45
-
46
- # Articles Display
47
- st.subheader("News Articles Analysis")
48
- for idx, article in enumerate(result["Articles"], 1):
49
- with st.expander(f"Article {idx}: {article['Title']}"):
50
- st.write(f"**Summary**: {article['Summary']}")
51
- st.write(f"**Sentiment**: {article['Sentiment']}")
52
- st.write(f"**URL**: [Read Full Article]({article['URL']})")
53
- st.write(f"**Key Topics**: {', '.join(article['Topics'])}")
54
-
55
- # Audio Section
56
- if result["Audio"]:
57
- st.subheader("Hindi Summary Audio")
58
- st.audio(result["Audio"], format="audio/wav")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import requests
3
+ import base64
4
+ import time
5
+ import pandas as pd
6
+ import matplotlib.pyplot as plt
7
+ from wordcloud import WordCloud
8
+ from io import BytesIO
9
+ from tenacity import retry, stop_after_attempt, wait_exponential
10
 
11
+ # Configuration
12
  HF_API_URL = "https://saquib34-news-analyzer.hf.space/api/analyze"
13
+ SAMPLE_COMPANIES = ["Microsoft", "Apple", "Google", "Amazon", "Tesla", "Meta"]
14
+ REQUEST_TIMEOUT = 120 # 2 minutes
15
+ CACHE_TTL = 1800 # 30 minutes
16
 
17
+ # Helper functions
18
+ @retry(stop=stop_after_attempt(3),
19
+ wait=wait_exponential(multiplier=1, min=4, max=15))
20
+ @st.cache_data(ttl=CACHE_TTL, show_spinner=False)
21
  def analyze_company(company_name):
22
+ """Call Hugging Face API with enhanced error handling"""
23
  try:
24
  response = requests.post(
25
  HF_API_URL,
26
+ json={"name": company_name.strip()},
27
+ headers={"Content-Type": "application/json"},
28
+ timeout=REQUEST_TIMEOUT
29
  )
30
  response.raise_for_status()
31
  return response.json()
32
+ except requests.exceptions.Timeout:
33
+ st.error("Backend response timed out. Please try again later.")
34
+ return None
35
  except requests.exceptions.RequestException as e:
36
+ st.error(f"API Connection Error: {str(e)}")
37
+ return None
38
+ except json.JSONDecodeError:
39
+ st.error("Invalid response from backend")
40
  return None
41
 
42
+ def validate_audio(audio_base64):
43
+ """Validate and decode audio data"""
44
+ try:
45
+ if not audio_base64 or len(audio_base64) < 100:
46
+ return None
47
+ return base64.b64decode(audio_base64)
48
+ except Exception as e:
49
+ st.error(f"Audio decoding error: {str(e)}")
50
+ return None
51
 
52
+ def create_sentiment_chart(data):
53
+ """Generate pie chart with validation"""
54
+ try:
55
+ if not data or sum(data.values()) == 0:
56
+ return None
57
+
58
+ fig, ax = plt.subplots()
59
+ values = list(data.values())
60
+ labels = list(data.keys())
61
+ colors = ['#4CAF50', '#F44336', '#9E9E9E']
62
+
63
+ ax.pie(values, labels=labels, colors=colors, autopct='%1.1f%%',
64
+ startangle=90, wedgeprops={'edgecolor': 'white'})
65
+ ax.axis('equal')
66
+ return fig
67
+ except Exception as e:
68
+ st.error(f"Chart error: {str(e)}")
69
+ return None
70
 
71
+ def safe_dataframe(data_dict, default_index=None):
72
+ """Create DataFrame with length validation"""
73
+ try:
74
+ if not data_dict:
75
+ return pd.DataFrame()
76
+
77
+ lengths = [len(v) for v in data_dict.values()]
78
+ if len(set(lengths)) > 1:
79
+ st.warning("Data inconsistency detected in results")
80
+ return pd.DataFrame()
81
+
82
+ return pd.DataFrame(data_dict, index=default_index)
83
+ except Exception as e:
84
+ st.error(f"Data processing error: {str(e)}")
85
+ return pd.DataFrame()
86
+
87
+ # Streamlit UI Configuration
88
+ st.set_page_config(
89
+ page_title="News Analyzer Pro",
90
+ page_icon="📊",
91
+ layout="wide",
92
+ initial_sidebar_state="expanded"
93
+ )
94
+
95
+ # Session State Management
96
+ if 'result' not in st.session_state:
97
+ st.session_state.result = None
98
+ if 'progress' not in st.session_state:
99
+ st.session_state.progress = 0
100
+
101
+ # Sidebar Controls
102
+ with st.sidebar:
103
+ st.title("Control Panel")
104
+ st.markdown("---")
105
+
106
+ selected_company = st.selectbox(
107
+ "Select Company",
108
+ SAMPLE_COMPANIES,
109
+ index=0,
110
+ help="Choose from popular companies or enter a custom name below"
111
+ )
112
+
113
+ custom_company = st.text_input(
114
+ "Custom Company Name",
115
+ placeholder="e.g., Netflix, Samsung...",
116
+ max_chars=50
117
+ )
118
+
119
+ company = custom_company.strip() if custom_company else selected_company
120
+
121
+ st.markdown("---")
122
+ st.markdown("### Settings")
123
+ auto_play = st.checkbox("Auto-Play Audio", True)
124
+ show_raw = st.checkbox("Show Raw Data", False)
125
+
126
+ st.markdown("---")
127
+ st.markdown("### System Monitor")
128
+ st.metric("API Timeout", f"{REQUEST_TIMEOUT}s")
129
+ st.metric("Cache Status", "Active" if st.session_state.result else "Inactive")
130
+ st.progress(st.session_state.progress)
131
+
132
+ # Main Interface
133
+ st.title("📈 Advanced News Sentiment Analyzer")
134
+ st.markdown("""
135
+ *Comprehensive news analysis with AI-powered insights and audio summaries*
136
+ **Note**: Results based on latest articles from trusted sources
137
+ """)
138
+
139
+ # Analysis Execution
140
+ if st.button("Start Analysis", type="primary", key="analyze_btn"):
141
  if not company:
142
  st.warning("Please enter a company name")
143
  else:
144
+ st.session_state.progress = 0
145
+ progress_bar = st.progress(0)
146
+ status_text = st.empty()
147
+
148
+ try:
149
+ with st.spinner("Initializing analysis pipeline..."):
150
+ for i in range(100):
151
+ st.session_state.progress = i + 1
152
+ progress_bar.progress(st.session_state.progress)
153
+ status_text.text(f"Progress: {st.session_state.progress}%")
154
+ time.sleep(0.1) # Simulated progress
155
+
156
+ st.session_state.result = analyze_company(company)
157
+
158
+ except Exception as e:
159
+ st.error(f"Analysis failed: {str(e)}")
160
+ st.session_state.result = None
161
+ finally:
162
+ progress_bar.empty()
163
+ status_text.empty()
164
+
165
+ # Results Display
166
+ if st.session_state.result:
167
+ result = st.session_state.result
168
+
169
+ try:
170
+ # Header Section
171
+ st.success(f"Analysis Complete for **{result.get('Company', 'Unknown')}**")
172
 
173
+ # Main Metrics
174
+ with st.container():
175
+ col1, col2, col3 = st.columns([2, 3, 2])
176
+
177
+ with col1:
178
+ st.subheader("Sentiment Distribution")
179
+ chart = create_sentiment_chart(result.get('ComparativeSentimentScore', {}))
180
+ if chart:
181
+ st.pyplot(chart)
182
+ else:
183
+ st.warning("No sentiment data available")
184
+
185
+ with col2:
186
+ st.subheader("Key Topics Cloud")
187
+ try:
188
+ text = ' '.join([', '.join(article.get('Topics', []))
189
+ for article in result.get('Articles', [])])
190
+ if text:
191
+ wordcloud = WordCloud(width=800, height=400,
192
+ background_color='white').generate(text)
193
+ st.image(wordcloud.to_array(), use_column_width=True)
194
+ else:
195
+ st.warning("No topics identified")
196
+ except Exception as e:
197
+ st.error(f"Word cloud error: {str(e)}")
198
+
199
+ with col3:
200
+ st.subheader("Audio Summary")
201
+ audio_data = result.get("Audio", "")
202
+ audio_bytes = validate_audio(audio_data)
203
+
204
+ if audio_bytes:
205
+ st.audio(audio_bytes, format="audio/wav")
206
+ st.download_button(
207
+ label="Download Audio",
208
+ data=audio_bytes,
209
+ file_name=f"{company}_summary.wav",
210
+ mime="audio/wav"
211
+ )
212
+ else:
213
+ st.warning("Audio summary unavailable")
214
+
215
+ # Detailed Analysis Tabs
216
+ tab1, tab2, tab3 = st.tabs(["Articles", "Comparative Data", "Technical Info"])
217
+
218
+ with tab1:
219
+ st.subheader("Article Breakdown")
220
+ articles = result.get('Articles', [])
221
+ if articles:
222
+ for idx, article in enumerate(articles, 1):
223
+ with st.expander(f"📰 Article {idx}: {article.get('Title', 'Untitled')}",
224
+ expanded=False):
225
+ col1, col2 = st.columns([3, 1])
226
+ with col1:
227
+ st.markdown(f"**Summary**: {article.get('Summary', 'No summary available')}")
228
+ if article.get('URL'):
229
+ st.markdown(f"**Source**: [Read Article]({article['URL']})")
230
+ with col2:
231
+ st.markdown(f"**Sentiment**: {article.get('Sentiment', 'N/A')}")
232
+ if article.get('Topics'):
233
+ st.markdown("**Key Topics**:")
234
+ for topic in article['Topics']:
235
+ st.markdown(f"- {topic}")
236
+ else:
237
+ st.warning("No articles found for analysis")
238
+
239
+ with tab2:
240
+ st.subheader("Comparative Analysis")
241
 
 
242
  col1, col2 = st.columns(2)
243
  with col1:
244
+ try:
245
+ st.markdown("### Topic Overlap")
246
+ common_topics = result.get('Topic Overlap', {}).get('Common Topics', [])
247
+ if common_topics:
248
+ df = safe_dataframe({
249
+ "Common Topics": common_topics,
250
+ "Frequency": [1]*len(common_topics)
251
+ })
252
+ st.dataframe(df, use_container_width=True)
253
+ else:
254
+ st.warning("No common topics identified")
255
+ except Exception as e:
256
+ st.error(f"Topic display error: {str(e)}")
257
+
258
  with col2:
259
+ try:
260
+ st.markdown("### Coverage Differences")
261
+ coverage_data = result.get('Coverage Differences', [])
262
+ if coverage_data:
263
+ df = safe_dataframe(coverage_data).explode('Unique Topics')
264
+ st.dataframe(df, use_container_width=True, hide_index=True)
265
+ else:
266
+ st.warning("No coverage differences found")
267
+ except Exception as e:
268
+ st.error(f"Coverage display error: {str(e)}")
269
+
270
+ with tab3:
271
+ st.subheader("Technical Details")
272
+
273
+ with st.expander("System Metrics"):
274
+ tech_data = {
275
+ "Component": ["Backend API", "Sentiment Model", "Summary Model"],
276
+ "Version": ["1.2.0", "distilbert-base", "distilbart-cnn-12-6"],
277
+ "Status": ["Operational", "Loaded", "Loaded"]
278
+ }
279
+ st.dataframe(safe_dataframe(tech_data), hide_index=True)
280
+
281
+ if show_raw and result:
282
+ with st.expander("Raw API Response"):
283
+ st.json(result)
284
+
285
+ except Exception as e:
286
+ st.error("""
287
+ ## Critical Error Rendering Results
288
+ Please try:
289
+ 1. Refreshing the page
290
+ 2. Simplifying your search terms
291
+ 3. Contacting support if issue persists
292
+ """)
293
+ st.exception(e)
294
+
295
+ # Footer & Documentation
296
+ st.markdown("---")
297
+ with st.expander("Documentation & Support"):
298
+ st.markdown("""
299
+ ## User Guide
300
+
301
+ ### Basic Usage
302
+ 1. Select a company from the dropdown
303
+ 2. Click 'Start Analysis'
304
+ 3. Explore results in different tabs
305
+
306
+ ### Features
307
+ - **Sentiment Distribution**: Pie chart of article sentiments
308
+ - **Topic Cloud**: Visual representation of common keywords
309
+ - **Audio Summary**: Hindi translation of key insights
310
+ - **Technical Details**: System metrics and raw data
311
+
312
+ ### Support
313
+ **Contact:** support@newsanalyzer.com
314
+ **API Docs:** [API Documentation](https://api.newsanalyzer.com)
315
+ **Service Status:** [Status Page](https://status.newsanalyzer.com)
316
+ """)
317
+
318
+ # Cache Management
319
+ if st.sidebar.button("Clear Cache & Reset"):
320
+ st.session_state.clear()
321
+ st.rerun()
utils/utils.py CHANGED
@@ -87,12 +87,25 @@ def perform_comparative_analysis(articles):
87
 
88
  return sentiment_counts, coverage_differences, topic_overlap
89
 
 
90
  def generate_hindi_tts(text: str) -> str:
91
- text = text[:1000]
92
- text = GoogleTranslator(source='en', target='hi').translate(text)
93
- tts = gTTS(text, lang='hi')
94
- audio_buffer = BytesIO()
95
- tts.save(audio_buffer)
96
- audio_buffer.seek(0)
97
-
98
- return base64.b64encode(audio_buffer.read()).decode('utf-8')
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  return sentiment_counts, coverage_differences, topic_overlap
89
 
90
+ # Add validation in your backend's generate_hindi_tts function
91
  def generate_hindi_tts(text: str) -> str:
92
+ try:
93
+ if not text or len(text) < 50:
94
+ return ""
95
+
96
+ text = text[:1000]
97
+ translated = GoogleTranslator(source='en', target='hi').translate(text)
98
+
99
+ if not translated:
100
+ return ""
101
+
102
+ tts = gTTS(translated, lang='hi')
103
+ audio_buffer = BytesIO()
104
+ tts.save(audio_buffer)
105
+ audio_buffer.seek(0)
106
+
107
+ return base64.b64encode(audio_buffer.read()).decode('utf-8')
108
+
109
+ except Exception as e:
110
+ print(f"TTS Error: {str(e)}")
111
+ return ""