gauthamnairy commited on
Commit
a0f5062
·
verified ·
1 Parent(s): da62bc1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -63
app.py CHANGED
@@ -23,6 +23,11 @@ import plotly
23
  from newsapi import NewsApiClient
24
  import certifi
25
  import requests
 
 
 
 
 
26
 
27
  app = Flask(__name__)
28
  CORS(app)
@@ -50,6 +55,49 @@ ALLOWED_EXTENSIONS = {'txt', 'pdf', 'docx', 'xlsx', 'csv'}
50
  files_storage = {}
51
  chunks_storage = []
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def allowed_file(filename):
54
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
55
 
@@ -120,7 +168,6 @@ def get_conversational_chain():
120
  Answer the question as detailed as possible from the provided context. If the answer is not directly
121
  available in the provided context, use your knowledge to infer a reasonable answer based on the given information.
122
  If you're unsure or the question is completely unrelated to the context, state that you don't have enough information to answer accurately.
123
-
124
  Context:\n{context}\n
125
  Question:\n{question}\n
126
  Answer:
@@ -167,9 +214,7 @@ def process_query(query, role=None, file_id=None):
167
 
168
  prompt = f'''
169
  {system_prompt}
170
-
171
  Query: "{query}"
172
-
173
  Requirements:
174
  - Use a friendly yet professional tone.
175
  - Ensure the response is accurate and directly addresses the question.
@@ -186,68 +231,100 @@ def process_query(query, role=None, file_id=None):
186
 
187
  return generated_text
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  def get_energy_news(query):
190
  try:
191
- articles = newsapi.get_everything(q=query, language='en', sort_by='publishedAt', page_size=10)
192
  return articles['articles']
193
  except Exception as e:
194
  logging.error(f"Error fetching news: {e}")
195
  return []
196
 
197
- def summarize_article(article):
198
- title = article.get('title', 'No title')
199
- content = article.get('description', '') or article.get('content', '') or ''
200
- prompt = f"""
201
- Summarize the following news article in 3-4 lines:
202
 
 
 
 
 
203
  Title: {title}
204
  Content: {content}
 
 
 
 
 
 
 
 
205
  """
 
206
  try:
207
  response = model.generate_content(prompt)
208
- return response.text.strip()
 
 
 
 
 
 
 
 
 
 
209
  except Exception as e:
210
- logging.error(f"Error summarizing article: {e}")
211
- return "Unable to generate summary."
212
 
213
- def filter_and_analyze_news(query, articles):
 
214
  filtered_and_analyzed_news = []
215
 
216
- for article in articles:
217
- title = article.get('title', 'No title')
218
- content = article.get('description', '') or article.get('content', '') or ''
219
-
220
- prompt = f"""
221
- Analyze the following news article in the context of the energy market:
222
-
223
- Query: {query}
224
- Title: {title}
225
- Content: {content}
226
-
227
- Is this article directly relevant to "{query}" in the context of the energy market?
228
- Answer ONLY 'YES' or 'NO', followed by a brief explanation.
229
-
230
- If YES, provide:
231
- 1. A concise 2-3 sentence summary of the news.
232
- 2. Key points (up to 3 bullet points).
233
- 3. Specific impact on the energy market related to {query} (1-2 sentences).
234
- """
235
-
236
- try:
237
- response = model.generate_content(prompt)
238
- analysis = response.text.strip()
239
-
240
- if analysis.startswith("YES"):
241
- filtered_and_analyzed_news.append({
242
- 'title': title,
243
- 'link': article.get('url', '#'),
244
- 'analysis': analysis.split("YES", 1)[1].strip()
245
- })
246
-
247
- if len(filtered_and_analyzed_news) >= 10:
248
  break
249
- except Exception as e:
250
- logging.error(f"Error analyzing article: {e}")
251
 
252
  return filtered_and_analyzed_news
253
 
@@ -255,20 +332,23 @@ def generate_market_summary(query, filtered_news):
255
  if not filtered_news:
256
  return f"No relevant news found for '{query}' in the energy market context."
257
 
 
 
 
258
  summaries = [item.get('analysis', '') for item in filtered_news]
259
  combined_summary = "\n\n".join(summaries)
260
 
261
  prompt = f"""
262
- Based on the following summaries of recent news articles related to '{query}' in the energy market:
263
-
264
  {combined_summary}
265
-
266
- Provide a concise market summary that:
267
  1. Highlights the current trends and developments related to {query} in the energy market.
268
  2. Identifies any significant impacts or potential changes in the market.
269
  3. Mentions any notable events or decisions affecting this area.
270
-
271
- Keep the summary focused on factual information derived from the news articles, without adding speculation or personal opinions.
 
 
272
  """
273
 
274
  try:
@@ -289,20 +369,29 @@ def query():
289
  role = data.get('role')
290
  file_id = data.get('file_id')
291
  news_context = data.get('newsContext')
292
- try:
293
- logging.info(f"Received query: {query}, role: {role}, file_id: {file_id}")
294
 
295
- if role == 'AI News Analyst' and news_context:
296
- # Handle news-related queries with context
 
297
  prompt = f"""
298
- As an AI News Analyst specializing in the energy market, answer the following question based on the provided news context:
299
 
300
  News Context:
301
- {json.dumps(news_context, indent=2)}
 
 
 
 
 
302
 
303
- Question: {query}
 
 
 
 
 
304
 
305
- Provide a concise and informative response, using the provided news context to support your answer.
306
  """
307
  response = model.generate_content(prompt)
308
  return jsonify({'response': response.text})
@@ -412,7 +501,8 @@ def fetch_news():
412
  query = data.get('query')
413
  try:
414
  all_articles = get_energy_news(query)
415
- filtered_news = filter_and_analyze_news(query, all_articles)
 
416
  market_summary = generate_market_summary(query, filtered_news)
417
 
418
  # Prepare the top 10 articles with summaries
@@ -422,7 +512,9 @@ def fetch_news():
422
  top_articles.append({
423
  'title': article.get('title', 'No title'),
424
  'url': article.get('link', '#'),
425
- 'summary': summary
 
 
426
  })
427
 
428
  return jsonify({
 
23
  from newsapi import NewsApiClient
24
  import certifi
25
  import requests
26
+ from bs4 import BeautifulSoup
27
+ from concurrent.futures import ThreadPoolExecutor, as_completed
28
+ from urllib.parse import urlparse, urljoin
29
+ import time
30
+ import random
31
 
32
  app = Flask(__name__)
33
  CORS(app)
 
55
  files_storage = {}
56
  chunks_storage = []
57
 
58
+ # List of energy company websites to scrape
59
+ ENERGY_COMPANIES = [
60
+ # Oil and Gas Companies
61
+ "https://corporate.exxonmobil.com/",
62
+ "https://www.chevron.com/",
63
+ "https://www.bp.com/",
64
+ "https://www.shell.com/",
65
+ "https://totalenergies.com/",
66
+ "https://www.aramco.com/",
67
+ "http://www.petrochina.com.cn/ptr/",
68
+ "https://www.gazprom.com/",
69
+ "https://www.lukoil.com/",
70
+ "https://www.rosneft.com/",
71
+ # Renewable Energy Companies
72
+ "https://www.nexteraenergy.com/",
73
+ "https://www.iberdrola.com/",
74
+ "https://www.vestas.com/",
75
+ "https://www.siemensgamesa.com/",
76
+ "https://orsted.com/",
77
+ "https://www.enelgreenpower.com/",
78
+ "https://www.firstsolar.com/",
79
+ "https://bep.brookfield.com/",
80
+ "https://www.canadiansolar.com/",
81
+ "https://us.sunpower.com/",
82
+ # Electricity Generation and Utility Companies
83
+ "https://www.duke-energy.com/",
84
+ "https://www.edf.fr/",
85
+ "https://www.eon.com/",
86
+ "https://www.enel.com/",
87
+ "https://www.nationalgrid.com/",
88
+ "https://www.southerncompany.com/",
89
+ "https://www.aep.com/",
90
+ "https://www.iberdrola.com/",
91
+ "https://www.engie.com/",
92
+ "https://www.xcelenergy.com/",
93
+ # Nuclear Energy Companies
94
+ "https://www.edf.fr/",
95
+ "https://www.rosatom.ru/",
96
+ "https://www.exeloncorp.com/",
97
+ "https://www.westinghousenuclear.com/",
98
+ "https://www.orano.group/en/"
99
+ ]
100
+
101
  def allowed_file(filename):
102
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
103
 
 
168
  Answer the question as detailed as possible from the provided context. If the answer is not directly
169
  available in the provided context, use your knowledge to infer a reasonable answer based on the given information.
170
  If you're unsure or the question is completely unrelated to the context, state that you don't have enough information to answer accurately.
 
171
  Context:\n{context}\n
172
  Question:\n{question}\n
173
  Answer:
 
214
 
215
  prompt = f'''
216
  {system_prompt}
 
217
  Query: "{query}"
 
218
  Requirements:
219
  - Use a friendly yet professional tone.
220
  - Ensure the response is accurate and directly addresses the question.
 
231
 
232
  return generated_text
233
 
234
+ def scrape_company_news(url):
235
+ try:
236
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
237
+ response = requests.get(url, headers=headers, timeout=10)
238
+ soup = BeautifulSoup(response.content, 'html.parser')
239
+
240
+ # This is a basic scraper. You'll need to adjust the selectors for each website
241
+ articles = soup.find_all('article') or soup.find_all('div', class_='news-item') or soup.find_all('div', class_='press-release')
242
+
243
+ news_items = []
244
+ for article in articles[:5]: # Limit to top 5 news items per company
245
+ title = article.find('h2') or article.find('h3') or article.find('a')
246
+ link = article.find('a')
247
+ if title and link:
248
+ news_items.append({
249
+ 'title': title.text.strip(),
250
+ 'url': urljoin(url, link['href']),
251
+ 'source': urlparse(url).netloc
252
+ })
253
+
254
+ return news_items
255
+ except Exception as e:
256
+ logging.error(f"Error scraping {url}: {str(e)}")
257
+ return []
258
+
259
+ def get_company_news():
260
+ with ThreadPoolExecutor(max_workers=10) as executor:
261
+ future_to_url = {executor.submit(scrape_company_news, url): url for url in ENERGY_COMPANIES}
262
+ all_company_news = []
263
+ for future in as_completed(future_to_url):
264
+ all_company_news.extend(future.result())
265
+ time.sleep(random.uniform(0.5, 1.5)) # Random delay to avoid overwhelming servers
266
+ return all_company_news
267
+
268
  def get_energy_news(query):
269
  try:
270
+ articles = newsapi.get_everything(q=query, language='en', sort_by='publishedAt', page_size=20)
271
  return articles['articles']
272
  except Exception as e:
273
  logging.error(f"Error fetching news: {e}")
274
  return []
275
 
276
+ def analyze_news_item(item, query, is_company_news=False):
277
+ source = item.get('source', {}).get('name') if not is_company_news else item.get('source')
278
+ title = item.get('title', 'No title')
279
+ content = item.get('description', '') or item.get('content', '') or ''
280
+ url = item.get('url', '#')
281
 
282
+ prompt = f"""
283
+ Analyze the following news item in the context of the energy market:
284
+ Query: {query}
285
+ Source: {source}
286
  Title: {title}
287
  Content: {content}
288
+ URL: {url}
289
+
290
+ Is this news item directly relevant to "{query}" in the context of the energy market?
291
+ Answer ONLY 'YES' or 'NO', followed by a brief explanation.
292
+ If YES, provide:
293
+ 1. A concise 2-3 sentence summary of the news.
294
+ 2. Key points (up to 3 bullet points).
295
+ 3. Specific impact on the energy market related to {query} (1-2 sentences).
296
  """
297
+
298
  try:
299
  response = model.generate_content(prompt)
300
+ analysis = response.text.strip()
301
+
302
+ if analysis.startswith("YES"):
303
+ return {
304
+ 'title': title,
305
+ 'link': url,
306
+ 'source': source,
307
+ 'analysis': analysis.split("YES", 1)[1].strip(),
308
+ 'is_company_news': is_company_news
309
+ }
310
+ return None
311
  except Exception as e:
312
+ logging.error(f"Error analyzing news item: {e}")
313
+ return None
314
 
315
+ def filter_and_analyze_news(query, articles, company_news):
316
+ all_news = articles + company_news
317
  filtered_and_analyzed_news = []
318
 
319
+ with ThreadPoolExecutor(max_workers=20) as executor:
320
+ future_to_item = {executor.submit(analyze_news_item, item, query, isinstance(item, dict)): item for item in all_news}
321
+ for future in as_completed(future_to_item): # Changed from future_to_url to future_to_item
322
+ result = future.result()
323
+ if result:
324
+ filtered_and_analyzed_news.append(result)
325
+
326
+ if len(filtered_and_analyzed_news) >= 20:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  break
 
 
328
 
329
  return filtered_and_analyzed_news
330
 
 
332
  if not filtered_news:
333
  return f"No relevant news found for '{query}' in the energy market context."
334
 
335
+ general_news = [item for item in filtered_news if not item.get('is_company_news')]
336
+ company_news = [item for item in filtered_news if item.get('is_company_news')]
337
+
338
  summaries = [item.get('analysis', '') for item in filtered_news]
339
  combined_summary = "\n\n".join(summaries)
340
 
341
  prompt = f"""
342
+ Based on the following summaries of recent news articles and company announcements related to '{query}' in the energy market:
 
343
  {combined_summary}
344
+ Provide a comprehensive market summary that:
 
345
  1. Highlights the current trends and developments related to {query} in the energy market.
346
  2. Identifies any significant impacts or potential changes in the market.
347
  3. Mentions any notable events or decisions affecting this area.
348
+ 4. Compares and contrasts information from general news sources and energy company announcements.
349
+ 5. Identifies any discrepancies or complementary information between general news and company-specific news.
350
+ Keep the summary focused on factual information derived from the news articles and company announcements, without adding speculation or personal opinions.
351
+ Organize the summary into clear sections with appropriate subheadings.
352
  """
353
 
354
  try:
 
369
  role = data.get('role')
370
  file_id = data.get('file_id')
371
  news_context = data.get('newsContext')
 
 
372
 
373
+ try:
374
+ if news_context:
375
+ # Process query with news context
376
  prompt = f"""
377
+ You are an AI News Analyst specializing in the energy market. Use the following news context and your general knowledge to answer the query.
378
 
379
  News Context:
380
+ Market Summary: {news_context.get('market_summary', 'No market summary available.')}
381
+
382
+ Top Articles:
383
+ {' '.join([f"- {article['title']}: {article['summary']}" for article in news_context.get('top_articles', [])])}
384
+
385
+ Query: {query}
386
 
387
+ Provide a comprehensive answer that:
388
+ 1. Directly addresses the query using information from the news context.
389
+ 2. Incorporates relevant general knowledge about the energy market.
390
+ 3. Highlights any connections or insights between the query and the recent news.
391
+ 4. Offers a balanced perspective, considering both general news and company-specific announcements.
392
+ 5. Suggests potential implications or future trends based on the available information.
393
 
394
+ Format your response with clear headings and bullet points where appropriate.
395
  """
396
  response = model.generate_content(prompt)
397
  return jsonify({'response': response.text})
 
501
  query = data.get('query')
502
  try:
503
  all_articles = get_energy_news(query)
504
+ company_news = get_company_news()
505
+ filtered_news = filter_and_analyze_news(query, all_articles, company_news)
506
  market_summary = generate_market_summary(query, filtered_news)
507
 
508
  # Prepare the top 10 articles with summaries
 
512
  top_articles.append({
513
  'title': article.get('title', 'No title'),
514
  'url': article.get('link', '#'),
515
+ 'source': article.get('source', 'Unknown'),
516
+ 'summary': summary,
517
+ 'is_company_news': article.get('is_company_news', False)
518
  })
519
 
520
  return jsonify({