gauthamnairy commited on
Commit
1946229
·
verified ·
1 Parent(s): fd6cc4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +377 -174
app.py CHANGED
@@ -28,13 +28,40 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
28
  from urllib.parse import urlparse, urljoin
29
  import time
30
  import random
 
 
 
 
 
 
 
 
 
31
 
32
  app = Flask(__name__)
33
- CORS(app)
 
 
 
 
 
 
34
 
35
  # Set up logging
36
  logging.basicConfig(level=logging.INFO)
37
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  # Set your Google API key securely using an environment variable
39
  google_api_key = os.getenv('GOOGLE_API_KEY')
40
  genai.configure(api_key=google_api_key)
@@ -99,6 +126,9 @@ ENERGY_COMPANIES = [
99
  "https://www.orano.group/en/"
100
  ]
101
 
 
 
 
102
  def allowed_file(filename):
103
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
104
 
@@ -207,237 +237,345 @@ def analyze_document(text):
207
  return response.text
208
 
209
  def process_query(query, role=None, file_id=None):
210
- logging.info(f"Processing query: {query}, role: {role}, file_id: {file_id}")
211
  if file_id:
212
  return answer_query_from_document(query, file_id)
213
  else:
214
  system_prompt = f"You are an AI assistant specializing in {role}." if role else "You are a helpful AI assistant."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
- prompt = f'''
217
- {system_prompt}
218
- Query: "{query}"
219
- Requirements:
220
- - Use a friendly yet professional tone.
221
- - Ensure the response is accurate and directly addresses the question.
222
- - Include relevant examples, definitions, or comparisons to enhance clarity.
223
- - Format the response in well-structured paragraphs or bullet points with bold headings when appropriate.
224
- - Use markdown formatting for code snippets, emphasis, and structure.
225
- - Aim for a comprehensive response that fully addresses the query.
226
- '''
227
-
228
- logging.info("Generating content...")
229
- response = model.generate_content(prompt)
230
- generated_text = response.text
231
- logging.info("Content generated successfully.")
232
-
233
- return generated_text
 
 
 
 
 
 
 
 
 
 
234
 
235
  def scrape_company_news(url):
 
 
 
 
236
  try:
237
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
238
- response = requests.get(url, headers=headers, timeout=10)
 
 
 
 
 
 
 
 
 
 
239
  soup = BeautifulSoup(response.content, 'html.parser')
240
 
241
- # This is a basic scraper. You'll need to adjust the selectors for each website
242
- articles = soup.find_all('article') or soup.find_all('div', class_='news-item') or soup.find_all('div', class_='press-release')
 
 
243
 
244
  news_items = []
245
- for article in articles[:5]: # Limit to top 5 news items per company
246
- title = article.find('h2') or article.find('h3') or article.find('a')
247
- link = article.find('a')
248
- if title and link:
 
249
  news_items.append({
250
- 'title': title.text.strip(),
251
- 'url': urljoin(url, link['href']),
252
  'source': urlparse(url).netloc
253
  })
254
-
255
  return news_items
256
  except Exception as e:
257
  logging.error(f"Error scraping {url}: {str(e)}")
258
  return []
259
 
260
- def get_company_news():
261
- with ThreadPoolExecutor(max_workers=10) as executor:
262
- future_to_url = {executor.submit(scrape_company_news, url): url for url in ENERGY_COMPANIES}
263
- all_company_news = []
264
- for future in as_completed(future_to_url):
265
- all_company_news.extend(future.result())
266
- time.sleep(random.uniform(0.5, 1.5)) # Random delay to avoid overwhelming servers
267
- return all_company_news
268
-
269
  def get_energy_news(query):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  try:
271
- articles = newsapi.get_everything(q=query, language='en', sort_by='publishedAt', page_size=20)
272
- return articles['articles']
 
 
 
 
 
 
 
 
 
 
 
 
273
  except Exception as e:
274
  logging.error(f"Error fetching news: {e}")
275
  return []
276
 
277
- def analyze_news_item(item, query, is_company_news=False):
278
- source = item.get('source', {}).get('name') if not is_company_news else item.get('source')
279
- title = item.get('title', 'No title')
280
- content = item.get('description', '') or item.get('content', '') or ''
281
- url = item.get('url', '#')
282
-
283
- prompt = f"""
284
- Analyze the following news item in the context of the energy market:
285
- Query: {query}
286
- Source: {source}
287
- Title: {title}
288
- Content: {content}
289
- URL: {url}
290
-
291
- Is this news item directly relevant to "{query}" in the context of the energy market?
292
- Answer ONLY 'YES' or 'NO', followed by a brief explanation.
293
- If YES, provide:
294
- 1. A concise 2-3 sentence summary of the news.
295
- 2. Key points (up to 3 bullet points).
296
- 3. Specific impact on the energy market related to {query} (1-2 sentences).
297
  """
298
-
 
 
 
299
  try:
300
- response = model.generate_content(prompt)
301
- analysis = response.text.strip()
302
-
303
- if analysis.startswith("YES"):
304
- return {
305
- 'title': title,
306
- 'link': url,
307
- 'source': source,
308
- 'analysis': analysis.split("YES", 1)[1].strip(),
309
- 'is_company_news': is_company_news
310
- }
311
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  except Exception as e:
313
- logging.error(f"Error analyzing news item: {e}")
314
  return None
315
 
 
 
 
 
 
 
 
 
 
316
  def filter_and_analyze_news(query, articles, company_news):
 
 
 
 
317
  all_news = articles + company_news
318
  filtered_and_analyzed_news = []
319
 
320
  with ThreadPoolExecutor(max_workers=20) as executor:
321
- future_to_item = {executor.submit(analyze_news_item, item, query, isinstance(item, dict)): item for item in all_news}
322
- for future in as_completed(future_to_item): # Changed from future_to_url to future_to_item
 
 
 
323
  result = future.result()
324
  if result:
325
  filtered_and_analyzed_news.append(result)
326
-
327
  if len(filtered_and_analyzed_news) >= 20:
328
  break
329
 
330
  return filtered_and_analyzed_news
331
 
332
  def generate_market_summary(query, filtered_news):
 
 
 
 
333
  if not filtered_news:
334
  return f"No relevant news found for '{query}' in the energy market context."
335
 
336
- general_news = [item for item in filtered_news if not item.get('is_company_news')]
337
- company_news = [item for item in filtered_news if item.get('is_company_news')]
338
-
339
- summaries = [item.get('analysis', '') for item in filtered_news]
340
  combined_summary = "\n\n".join(summaries)
341
 
342
  prompt = f"""
343
- Based on the following summaries of recent news articles and company announcements related to '{query}' in the energy market:
344
  {combined_summary}
 
345
  Provide a comprehensive market summary that:
346
- 1. Highlights the current trends and developments related to {query} in the energy market.
347
- 2. Identifies any significant impacts or potential changes in the market.
348
- 3. Mentions any notable events or decisions affecting this area.
349
- 4. Compares and contrasts information from general news sources and energy company announcements.
350
- 5. Identifies any discrepancies or complementary information between general news and company-specific news.
351
- Keep the summary focused on factual information derived from the news articles and company announcements, without adding speculation or personal opinions.
352
- Organize the summary into clear sections with appropriate subheadings.
353
  """
354
-
355
  try:
356
  response = model.generate_content(prompt)
357
  return response.text.strip()
358
  except Exception as e:
359
- logging.error(f"Error generating market summary: {e}")
360
- return f"Unable to generate market summary for '{query}' due to an error."
361
 
362
  @app.route('/')
363
  def index():
364
  return render_template('index.html')
365
 
 
 
 
 
 
 
 
 
 
 
 
366
  @app.route('/query', methods=['POST'])
 
367
  def query():
368
  data = request.json
 
 
 
369
  query = data.get('query')
370
  role = data.get('role')
371
  file_id = data.get('file_id')
372
- news_context = data.get('newsContext')
 
 
 
 
373
 
374
  try:
375
- if news_context:
376
- # Process query with news context
377
- prompt = f"""
378
- You are an AI News Analyst specializing in the energy market. Use the following news context and your general knowledge to answer the query.
379
-
380
- News Context:
381
- Market Summary: {news_context.get('market_summary', 'No market summary available.')}
382
-
383
- Top Articles:
384
- {' '.join([f"- {article['title']}: {article['summary']}" for article in news_context.get('top_articles', [])])}
385
-
386
- Query: {query}
387
-
388
- Provide a comprehensive answer that:
389
- 1. Directly addresses the query using information from the news context.
390
- 2. Incorporates relevant general knowledge about the energy market.
391
- 3. Highlights any connections or insights between the query and the recent news.
392
- 4. Offers a balanced perspective, considering both general news and company-specific announcements.
393
- 5. Suggests potential implications or future trends based on the available information.
394
-
395
- Format your response with clear headings and bullet points where appropriate.
396
- """
397
- response = model.generate_content(prompt)
398
- return jsonify({'response': response.text})
399
- else:
400
- # Handle regular queries as before
401
- response = process_query(query, role, file_id)
402
- return jsonify({'response': response})
403
  except Exception as e:
404
- logging.error(f"Error in /query route: {str(e)}", exc_info=True)
405
  return jsonify({'error': str(e)}), 500
406
 
407
  @app.route('/upload', methods=['POST'])
 
408
  def upload_file():
409
  if 'file' not in request.files:
410
  return jsonify({'error': 'No file part'}), 400
 
411
  file = request.files['file']
412
  if file.filename == '':
413
  return jsonify({'error': 'No selected file'}), 400
414
- if file and allowed_file(file.filename):
415
- filename = secure_filename(file.filename)
416
-
417
- try:
418
- file_content = file.read()
419
- logging.info(f"File uploaded successfully: {filename}")
420
- extracted_text = process_document(file_content, filename)
421
- text_chunks = get_text_chunks(extracted_text)
422
- analysis = analyze_document(extracted_text)
423
-
424
- file_id = len(files_storage) + 1
425
- files_storage[file_id] = {
426
- 'filename': filename,
427
- 'file_data': base64.b64encode(file_content).decode('utf-8'),
428
- 'analysis': analysis
429
- }
430
-
431
- create_vector_store(text_chunks, file_id)
432
-
433
- logging.info(f"File processing completed and saved to in-memory storage with ID: {file_id}")
434
 
435
- return jsonify({'file_id': file_id, 'analysis': analysis})
436
- except Exception as e:
437
- logging.error(f'Error processing file: {str(e)}', exc_info=True)
438
- return jsonify({'error': str(e)}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
 
440
- return jsonify({'error': 'Invalid file type'}), 400
 
 
 
 
 
 
 
441
 
442
  @app.route('/plot', methods=['POST'])
443
  def plot():
@@ -498,36 +636,101 @@ def process_dataframe_query(df, query):
498
 
499
  @app.route('/fetch_news', methods=['POST'])
500
  def fetch_news():
501
- data = request.json
502
- query = data.get('query')
503
  try:
504
- all_articles = get_energy_news(query)
505
- company_news = get_company_news()
506
- filtered_news = filter_and_analyze_news(query, all_articles, company_news)
507
- market_summary = generate_market_summary(query, filtered_news)
508
-
509
- # Prepare the top 10 articles with summaries
510
- top_articles = []
511
- for article in filtered_news[:10]:
512
- summary = article.get('analysis', '').split('\n\n')[0] # Get the first paragraph of the analysis as summary
513
- top_articles.append({
514
- 'title': article.get('title', 'No title'),
515
- 'url': article.get('link', '#'),
516
- 'source': article.get('source', 'Unknown'),
517
- 'summary': summary,
518
- 'is_company_news': article.get('is_company_news', False)
519
  })
520
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
521
  return jsonify({
522
- 'top_articles': top_articles,
523
- 'market_summary': market_summary,
524
- 'full_analysis': filtered_news
525
  })
 
526
  except Exception as e:
527
- logging.error(f"Error in fetch_news route: {str(e)}", exc_info=True)
528
- return jsonify({'error': str(e)}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
529
 
530
  if __name__ == '__main__':
531
- # Use the PORT environment variable if it's set, otherwise default to 7860
532
- port = int(os.environ.get('PORT', 7860))
533
- app.run(host='0.0.0.0', port=port)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  from urllib.parse import urlparse, urljoin
29
  import time
30
  import random
31
+ from functools import wraps
32
+ from pathlib import Path
33
+ from dotenv import load_dotenv
34
+ from requests.adapters import HTTPAdapter
35
+ from urllib3.util.retry import Retry
36
+ from transformers import pipeline
37
+ import torch
38
+
39
+ load_dotenv() # Load environment variables from .env file
40
 
41
  app = Flask(__name__)
42
+ CORS(app, resources={
43
+ r"/*": {
44
+ "origins": "*",
45
+ "methods": ["GET", "POST", "OPTIONS"],
46
+ "allow_headers": ["Content-Type"]
47
+ }
48
+ })
49
 
50
  # Set up logging
51
  logging.basicConfig(level=logging.INFO)
52
 
53
+ # Get the directory containing app.py
54
+ BASE_DIR = Path(__file__).resolve().parent
55
+
56
+ # Load environment variables from .env file
57
+ env_path = BASE_DIR / '.env'
58
+ load_dotenv(env_path)
59
+
60
+ # Add debug logging to check environment variables
61
+ print(f"Current working directory: {os.getcwd()}")
62
+ print(f"Env file exists: {os.path.exists('.env')}")
63
+ print(f"GOOGLE_API_KEY value: {os.getenv('GOOGLE_API_KEY')}")
64
+
65
  # Set your Google API key securely using an environment variable
66
  google_api_key = os.getenv('GOOGLE_API_KEY')
67
  genai.configure(api_key=google_api_key)
 
126
  "https://www.orano.group/en/"
127
  ]
128
 
129
+ # Initialize local summarization pipeline (using facebook/bart-large-cnn)
130
+ local_summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
131
+
132
  def allowed_file(filename):
133
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
134
 
 
237
  return response.text
238
 
239
  def process_query(query, role=None, file_id=None):
 
240
  if file_id:
241
  return answer_query_from_document(query, file_id)
242
  else:
243
  system_prompt = f"You are an AI assistant specializing in {role}." if role else "You are a helpful AI assistant."
244
+ prompt = f"""
245
+ {system_prompt}
246
+
247
+ Please format your response using markdown with proper structure:
248
+ - Use '##' for main headings
249
+ - Use '**' for bold text
250
+ - Use bullet points ('*') for lists
251
+ - Add proper spacing between sections
252
+ - Structure the content hierarchically
253
+ - Use proper paragraphs with line breaks
254
+
255
+ Query: "{query}"
256
+
257
+ Remember to:
258
+ - Format the response clearly and professionally
259
+ - Use headings for different sections
260
+ - Break down complex information into digestible parts
261
+ - Use bold text for emphasis on key terms
262
+ - Maintain consistent spacing
263
+ """
264
+ try:
265
+ response = model.generate_content(prompt)
266
+ return response.text
267
+ except Exception as e:
268
+ logging.error(f"Error generating content: {str(e)}", exc_info=True)
269
+ raise e
270
 
271
+ def local_summarize(text):
272
+ """
273
+ Summarizes the given text using a local huggingface model.
274
+ If the text is too long, it splits the text into manageable chunks.
275
+ """
276
+ # Maximum number of words (roughly) that the summarizer can handle
277
+ max_words = 800
278
+ words = text.split()
279
+
280
+ if len(words) > max_words:
281
+ # Split text into chunks of ~max_words tokens
282
+ chunks = []
283
+ chunk = []
284
+ for word in words:
285
+ chunk.append(word)
286
+ if len(chunk) >= max_words:
287
+ chunks.append(" ".join(chunk))
288
+ chunk = []
289
+ if chunk:
290
+ chunks.append(" ".join(chunk))
291
+
292
+ summaries = []
293
+ for chunk in chunks:
294
+ summary = local_summarizer(chunk, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
295
+ summaries.append(summary)
296
+ return " ".join(summaries)
297
+ else:
298
+ return local_summarizer(text, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
299
 
300
  def scrape_company_news(url):
301
+ """
302
+ Scrapes the top company news items from the given URL.
303
+ Uses a session with retries to mitigate timeouts or transient errors.
304
+ """
305
  try:
306
+ session = requests.Session()
307
+ retries = Retry(total=3, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
308
+ adapter = HTTPAdapter(max_retries=retries)
309
+ session.mount('https://', adapter)
310
+ session.mount('http://', adapter)
311
+
312
+ headers = {
313
+ 'User-Agent': ('Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
314
+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
315
+ 'Chrome/91.0.4472.124 Safari/537.36')
316
+ }
317
+ response = session.get(url, headers=headers, timeout=10)
318
  soup = BeautifulSoup(response.content, 'html.parser')
319
 
320
+ # Combine results from multiple selectors
321
+ articles = soup.find_all('article')
322
+ articles.extend(soup.find_all('div', class_='news-item'))
323
+ articles.extend(soup.find_all('div', class_='press-release'))
324
 
325
  news_items = []
326
+ # Only take the first 5 items (adjust as needed)
327
+ for article in articles[:5]:
328
+ title_elem = article.find('h2') or article.find('h3') or article.find('a')
329
+ link_elem = article.find('a')
330
+ if title_elem and link_elem and link_elem.has_attr('href'):
331
  news_items.append({
332
+ 'title': title_elem.get_text(strip=True),
333
+ 'url': urljoin(url, link_elem['href']),
334
  'source': urlparse(url).netloc
335
  })
 
336
  return news_items
337
  except Exception as e:
338
  logging.error(f"Error scraping {url}: {str(e)}")
339
  return []
340
 
 
 
 
 
 
 
 
 
 
341
  def get_energy_news(query):
342
+ """
343
+ Fetches the latest news articles from NewsData.io API based on the query.
344
+ """
345
+ logging.info(f"Starting news fetch for query: {query}")
346
+
347
+ news_data_api_key = os.getenv('NEWSDATA_API_KEY')
348
+ if not news_data_api_key:
349
+ logging.error("NewsData API key not found in environment variables")
350
+ return []
351
+
352
+ endpoint = "https://newsdata.io/api/1/news"
353
+
354
+ params = {
355
+ 'apikey': news_data_api_key,
356
+ 'q': query,
357
+ 'country': 'us',
358
+ 'language': 'en',
359
+ 'category': 'business'
360
+ }
361
+
362
+ logging.info(f"Making API request to: {endpoint}")
363
+ logging.info(f"With parameters: {params}")
364
+
365
  try:
366
+ response = requests.get(endpoint, params=params, timeout=10)
367
+ logging.info(f"API Response status code: {response.status_code}")
368
+
369
+ response.raise_for_status()
370
+ data = response.json()
371
+
372
+ if data.get("status") == "success":
373
+ articles = data.get("results", [])
374
+ logging.info(f"Successfully fetched {len(articles)} articles")
375
+ return articles
376
+ else:
377
+ logging.error(f"NewsData API error response: {data}")
378
+ return []
379
+
380
  except Exception as e:
381
  logging.error(f"Error fetching news: {e}")
382
  return []
383
 
384
+ def robust_analyze_news_item(item, query):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  """
386
+ Analyzes a news item using the generative model with better error handling.
387
+ """
388
+ logging.info(f"Starting analysis for article: {item.get('title', 'No title')}")
389
+
390
  try:
391
+ # Extract article information
392
+ title = item.get('title', '')
393
+ content = item.get('description', '') or item.get('content', '')
394
+ source = item.get('source_id', 'Unknown Source')
395
+ url = item.get('link', '#')
396
+
397
+ logging.info(f"Article details - Title: {title[:100]}...")
398
+ logging.info(f"Content length: {len(content)} characters")
399
+
400
+ # Skip if no meaningful content
401
+ if not content or len(content.strip()) < 10:
402
+ logging.warning(f"Skipping article due to insufficient content: {title}")
403
+ return None
404
+
405
+ prompt = f"""
406
+ Analyze this news article:
407
+ Title: {title}
408
+ Content: {content}
409
+
410
+ Provide a brief analysis in the following format:
411
+ 1. Summary (2-3 sentences)
412
+ 2. Key Points (up to 3 bullet points)
413
+ 3. Market Impact (1-2 sentences about potential market implications)
414
+ """
415
+
416
+ logging.info("Attempting analysis with generative model")
417
+ try:
418
+ response = model.generate_content(prompt)
419
+ analysis = response.text.strip()
420
+ logging.info("Successfully generated analysis with model")
421
+ except Exception as e:
422
+ logging.warning(f"Generative model failed: {str(e)}")
423
+ logging.info("Falling back to local summarizer")
424
+ analysis = local_summarize(content)
425
+
426
+ result = {
427
+ 'title': title,
428
+ 'link': url,
429
+ 'source': source,
430
+ 'analysis': analysis
431
+ }
432
+ logging.info("Successfully created analysis result")
433
+ return result
434
+
435
  except Exception as e:
436
+ logging.error(f"Error in robust_analyze_news_item: {str(e)}")
437
  return None
438
 
439
+ def get_company_news():
440
+ with ThreadPoolExecutor(max_workers=10) as executor:
441
+ future_to_url = {executor.submit(scrape_company_news, url): url for url in ENERGY_COMPANIES}
442
+ all_company_news = []
443
+ for future in as_completed(future_to_url):
444
+ all_company_news.extend(future.result())
445
+ time.sleep(random.uniform(0.5, 1.5)) # Random delay to avoid overwhelming servers
446
+ return all_company_news
447
+
448
  def filter_and_analyze_news(query, articles, company_news):
449
+ """
450
+ Processes both News API results and scraped company news.
451
+ Uses robust_analyze_news_item so that any API errors are handled gracefully.
452
+ """
453
  all_news = articles + company_news
454
  filtered_and_analyzed_news = []
455
 
456
  with ThreadPoolExecutor(max_workers=20) as executor:
457
+ future_to_item = {
458
+ executor.submit(robust_analyze_news_item, item, query): item
459
+ for item in all_news
460
+ }
461
+ for future in as_completed(future_to_item):
462
  result = future.result()
463
  if result:
464
  filtered_and_analyzed_news.append(result)
 
465
  if len(filtered_and_analyzed_news) >= 20:
466
  break
467
 
468
  return filtered_and_analyzed_news
469
 
470
  def generate_market_summary(query, filtered_news):
471
+ """
472
+ Generates an overall market summary using the individual news analyses.
473
+ Uses the generative model but falls back to local summarization in case of errors.
474
+ """
475
  if not filtered_news:
476
  return f"No relevant news found for '{query}' in the energy market context."
477
 
478
+ # Combine the analyses from each news item for context
479
+ summaries = []
480
+ for item in filtered_news:
481
+ summaries.append(f"Title: {item.get('title', 'No title')}\nAnalysis: {item.get('analysis', '')}\n")
482
  combined_summary = "\n\n".join(summaries)
483
 
484
  prompt = f"""
485
+ Based on the following news analyses:
486
  {combined_summary}
487
+
488
  Provide a comprehensive market summary that:
489
+ - Highlights current trends related to {query} in the energy market.
490
+ - Identifies key insights and potential market impacts.
491
+ - Organizes the information into clearly defined sections.
 
 
 
 
492
  """
 
493
  try:
494
  response = model.generate_content(prompt)
495
  return response.text.strip()
496
  except Exception as e:
497
+ logging.error(f"Error generating market summary using API: {e}. Falling back to local summarization.")
498
+ return local_summarize(combined_summary)
499
 
500
  @app.route('/')
501
  def index():
502
  return render_template('index.html')
503
 
504
+ # Add error handling decorator
505
+ def handle_errors(f):
506
+ @wraps(f)
507
+ def wrapper(*args, **kwargs):
508
+ try:
509
+ return f(*args, **kwargs)
510
+ except Exception as e:
511
+ logging.error(f"Error in {f.__name__}: {str(e)}", exc_info=True)
512
+ return jsonify({'error': str(e)}), 500
513
+ return wrapper
514
+
515
  @app.route('/query', methods=['POST'])
516
+ @handle_errors
517
  def query():
518
  data = request.json
519
+ if not data:
520
+ return jsonify({'error': 'No data provided'}), 400
521
+
522
  query = data.get('query')
523
  role = data.get('role')
524
  file_id = data.get('file_id')
525
+
526
+ if not query:
527
+ return jsonify({'error': 'No query provided'}), 400
528
+ if not role:
529
+ return jsonify({'error': 'No role provided'}), 400
530
 
531
  try:
532
+ response = process_query(query, role, file_id)
533
+ return jsonify({'response': response})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534
  except Exception as e:
535
+ logging.error(f"Error processing query: {str(e)}", exc_info=True)
536
  return jsonify({'error': str(e)}), 500
537
 
538
  @app.route('/upload', methods=['POST'])
539
+ @handle_errors
540
  def upload_file():
541
  if 'file' not in request.files:
542
  return jsonify({'error': 'No file part'}), 400
543
+
544
  file = request.files['file']
545
  if file.filename == '':
546
  return jsonify({'error': 'No selected file'}), 400
547
+
548
+ if not allowed_file(file.filename):
549
+ return jsonify({'error': 'Invalid file type'}), 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
550
 
551
+ try:
552
+ file_content = file.read()
553
+ filename = secure_filename(file.filename)
554
+
555
+ # Process the file
556
+ extracted_text = process_document(file_content, filename)
557
+ text_chunks = get_text_chunks(extracted_text)
558
+ analysis = analyze_document(extracted_text)
559
+
560
+ # Generate file ID and store
561
+ file_id = len(files_storage) + 1
562
+ files_storage[file_id] = {
563
+ 'filename': filename,
564
+ 'file_data': base64.b64encode(file_content).decode('utf-8'),
565
+ 'analysis': analysis
566
+ }
567
+
568
+ # Create vector store
569
+ create_vector_store(text_chunks, file_id)
570
 
571
+ return jsonify({
572
+ 'file_id': file_id,
573
+ 'analysis': analysis,
574
+ 'message': 'File processed successfully'
575
+ })
576
+ except Exception as e:
577
+ logging.error(f"Error processing file: {str(e)}", exc_info=True)
578
+ return jsonify({'error': str(e)}), 500
579
 
580
  @app.route('/plot', methods=['POST'])
581
  def plot():
 
636
 
637
  @app.route('/fetch_news', methods=['POST'])
638
  def fetch_news():
 
 
639
  try:
640
+ data = request.json
641
+ query = data.get('query', '')
642
+
643
+ # Fetch articles from NewsData.io
644
+ articles = get_energy_news(query)
645
+
646
+ if not articles:
647
+ return jsonify({
648
+ 'status': 'error',
649
+ 'message': 'No articles found',
650
+ 'articles': [],
651
+ 'summary': f"No relevant news found for '{query}'"
 
 
 
652
  })
653
 
654
+ # Analyze articles
655
+ analyzed_articles = []
656
+ for article in articles:
657
+ analysis = robust_analyze_news_item(article, query)
658
+ if analysis:
659
+ analyzed_articles.append(analysis)
660
+
661
+ if not analyzed_articles:
662
+ return jsonify({
663
+ 'status': 'error',
664
+ 'message': 'No articles could be analyzed',
665
+ 'articles': [],
666
+ 'summary': f"Could not analyze any articles for '{query}'"
667
+ })
668
+
669
+ # Generate market summary
670
+ summary_prompt = f"""
671
+ Based on the following analyzed news articles about "{query}":
672
+
673
+ {' '.join([f"Article: {a['title']}\nAnalysis: {a['analysis']}\n\n" for a in analyzed_articles[:5]])}
674
+
675
+ Provide a comprehensive market summary that:
676
+ 1. Highlights the main trends and developments
677
+ 2. Identifies potential market impacts
678
+ 3. Suggests key takeaways for stakeholders
679
+ """
680
+
681
+ try:
682
+ summary_response = model.generate_content(summary_prompt)
683
+ market_summary = summary_response.text.strip()
684
+ except Exception as e:
685
+ logging.error(f"Error generating market summary: {str(e)}")
686
+ market_summary = "Unable to generate market summary due to an error."
687
+
688
  return jsonify({
689
+ 'status': 'success',
690
+ 'articles': analyzed_articles,
691
+ 'summary': market_summary
692
  })
693
+
694
  except Exception as e:
695
+ logging.error(f"Error in fetch_news route: {str(e)}")
696
+ return jsonify({
697
+ 'status': 'error',
698
+ 'message': str(e),
699
+ 'articles': [],
700
+ 'summary': "An error occurred while processing the news request."
701
+ }), 500
702
+
703
+ # Add health check endpoint
704
+ @app.route('/health', methods=['GET'])
705
+ def health_check():
706
+ return jsonify({'status': 'healthy', 'api_key_configured': bool(google_api_key)})
707
+
708
+ # Ensure all required environment variables are set
709
+ def check_environment():
710
+ required_vars = ['GOOGLE_API_KEY']
711
+ missing_vars = [var for var in required_vars if not os.getenv(var)]
712
+ if missing_vars:
713
+ raise EnvironmentError(f"Missing required environment variables: {', '.join(missing_vars)}")
714
 
715
  if __name__ == '__main__':
716
+ try:
717
+ # Check environment variables
718
+ check_environment()
719
+
720
+ # Configure logging
721
+ logging.basicConfig(
722
+ level=logging.INFO,
723
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
724
+ )
725
+
726
+ # Initialize Google AI
727
+ if not google_api_key:
728
+ raise ValueError("GOOGLE_API_KEY not configured")
729
+ genai.configure(api_key=google_api_key)
730
+
731
+ # Start server
732
+ port = int(os.environ.get('PORT', 7860))
733
+ app.run(host='0.0.0.0', port=port, debug=True)
734
+ except Exception as e:
735
+ logging.error(f"Failed to start server: {str(e)}", exc_info=True)
736
+ raise