Spaces:

gauthamnairy
/

Finder

Sleeping

App Files Files Community

gauthamnairy commited on Aug 29, 2024

Commit

a0f5062

verified ·

1 Parent(s): da62bc1

Update app.py

Browse files

Files changed (1) hide show

app.py +155 -63

app.py CHANGED Viewed

@@ -23,6 +23,11 @@ import plotly
 from newsapi import NewsApiClient
 import certifi
 import requests
 app = Flask(__name__)
 CORS(app)
@@ -50,6 +55,49 @@ ALLOWED_EXTENSIONS = {'txt', 'pdf', 'docx', 'xlsx', 'csv'}
 files_storage = {}
 chunks_storage = []
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
@@ -120,7 +168,6 @@ def get_conversational_chain():
     Answer the question as detailed as possible from the provided context. If the answer is not directly
     available in the provided context, use your knowledge to infer a reasonable answer based on the given information.
     If you're unsure or the question is completely unrelated to the context, state that you don't have enough information to answer accurately.
     Context:\n{context}\n
     Question:\n{question}\n
     Answer:
@@ -167,9 +214,7 @@ def process_query(query, role=None, file_id=None):
         prompt = f'''
         {system_prompt}
         Query: "{query}"
         Requirements:
         - Use a friendly yet professional tone.
         - Ensure the response is accurate and directly addresses the question.
@@ -186,68 +231,100 @@ def process_query(query, role=None, file_id=None):
         return generated_text
 def get_energy_news(query):
     try:
-        articles = newsapi.get_everything(q=query, language='en', sort_by='publishedAt', page_size=10)
         return articles['articles']
     except Exception as e:
         logging.error(f"Error fetching news: {e}")
         return []
-def summarize_article(article):
-    title = article.get('title', 'No title')
-    content = article.get('description', '') or article.get('content', '') or ''
-    prompt = f"""
-    Summarize the following news article in 3-4 lines:
     Title: {title}
     Content: {content}
     """
     try:
         response = model.generate_content(prompt)
-        return response.text.strip()
     except Exception as e:
-        logging.error(f"Error summarizing article: {e}")
-        return "Unable to generate summary."
-def filter_and_analyze_news(query, articles):
     filtered_and_analyzed_news = []
-    for article in articles:
-        title = article.get('title', 'No title')
-        content = article.get('description', '') or article.get('content', '') or ''
-        prompt = f"""
-        Analyze the following news article in the context of the energy market:
-        Query: {query}
-        Title: {title}
-        Content: {content}
-        Is this article directly relevant to "{query}" in the context of the energy market?
-        Answer ONLY 'YES' or 'NO', followed by a brief explanation.
-        If YES, provide:
-        1. A concise 2-3 sentence summary of the news.
-        2. Key points (up to 3 bullet points).
-        3. Specific impact on the energy market related to {query} (1-2 sentences).
-        """
-        try:
-            response = model.generate_content(prompt)
-            analysis = response.text.strip()
-            if analysis.startswith("YES"):
-                filtered_and_analyzed_news.append({
-                    'title': title,
-                    'link': article.get('url', '#'),
-                    'analysis': analysis.split("YES", 1)[1].strip()
-                })
-            if len(filtered_and_analyzed_news) >= 10:
                 break
-        except Exception as e:
-            logging.error(f"Error analyzing article: {e}")
     return filtered_and_analyzed_news
@@ -255,20 +332,23 @@ def generate_market_summary(query, filtered_news):
     if not filtered_news:
         return f"No relevant news found for '{query}' in the energy market context."
     summaries = [item.get('analysis', '') for item in filtered_news]
     combined_summary = "\n\n".join(summaries)
     prompt = f"""
-    Based on the following summaries of recent news articles related to '{query}' in the energy market:
     {combined_summary}
-    Provide a concise market summary that:
     1. Highlights the current trends and developments related to {query} in the energy market.
     2. Identifies any significant impacts or potential changes in the market.
     3. Mentions any notable events or decisions affecting this area.
-    Keep the summary focused on factual information derived from the news articles, without adding speculation or personal opinions.
     """
     try:
@@ -289,20 +369,29 @@ def query():
     role = data.get('role')
     file_id = data.get('file_id')
     news_context = data.get('newsContext')
-    try:
-        logging.info(f"Received query: {query}, role: {role}, file_id: {file_id}")
-        if role == 'AI News Analyst' and news_context:
-            # Handle news-related queries with context
             prompt = f"""
-            As an AI News Analyst specializing in the energy market, answer the following question based on the provided news context:
             News Context:
-            {json.dumps(news_context, indent=2)}
-            Question: {query}
-            Provide a concise and informative response, using the provided news context to support your answer.
             """
             response = model.generate_content(prompt)
             return jsonify({'response': response.text})
@@ -412,7 +501,8 @@ def fetch_news():
     query = data.get('query')
     try:
         all_articles = get_energy_news(query)
-        filtered_news = filter_and_analyze_news(query, all_articles)
         market_summary = generate_market_summary(query, filtered_news)
         # Prepare the top 10 articles with summaries
@@ -422,7 +512,9 @@ def fetch_news():
             top_articles.append({
                 'title': article.get('title', 'No title'),
                 'url': article.get('link', '#'),
-                'summary': summary
             })
         return jsonify({

 from newsapi import NewsApiClient
 import certifi
 import requests
+from bs4 import BeautifulSoup
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from urllib.parse import urlparse, urljoin
+import time
+import random
 app = Flask(__name__)
 CORS(app)
 files_storage = {}
 chunks_storage = []
+# List of energy company websites to scrape
+ENERGY_COMPANIES = [
+    # Oil and Gas Companies
+    "https://corporate.exxonmobil.com/",
+    "https://www.chevron.com/",
+    "https://www.bp.com/",
+    "https://www.shell.com/",
+    "https://totalenergies.com/",
+    "https://www.aramco.com/",
+    "http://www.petrochina.com.cn/ptr/",
+    "https://www.gazprom.com/",
+    "https://www.lukoil.com/",
+    "https://www.rosneft.com/",
+    # Renewable Energy Companies
+    "https://www.nexteraenergy.com/",
+    "https://www.iberdrola.com/",
+    "https://www.vestas.com/",
+    "https://www.siemensgamesa.com/",
+    "https://orsted.com/",
+    "https://www.enelgreenpower.com/",
+    "https://www.firstsolar.com/",
+    "https://bep.brookfield.com/",
+    "https://www.canadiansolar.com/",
+    "https://us.sunpower.com/",
+    # Electricity Generation and Utility Companies
+    "https://www.duke-energy.com/",
+    "https://www.edf.fr/",
+    "https://www.eon.com/",
+    "https://www.enel.com/",
+    "https://www.nationalgrid.com/",
+    "https://www.southerncompany.com/",
+    "https://www.aep.com/",
+    "https://www.iberdrola.com/",
+    "https://www.engie.com/",
+    "https://www.xcelenergy.com/",
+    # Nuclear Energy Companies
+    "https://www.edf.fr/",
+    "https://www.rosatom.ru/",
+    "https://www.exeloncorp.com/",
+    "https://www.westinghousenuclear.com/",
+    "https://www.orano.group/en/"
+]
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
     Answer the question as detailed as possible from the provided context. If the answer is not directly
     available in the provided context, use your knowledge to infer a reasonable answer based on the given information.
     If you're unsure or the question is completely unrelated to the context, state that you don't have enough information to answer accurately.
     Context:\n{context}\n
     Question:\n{question}\n
     Answer:
         prompt = f'''
         {system_prompt}
         Query: "{query}"
         Requirements:
         - Use a friendly yet professional tone.
         - Ensure the response is accurate and directly addresses the question.
         return generated_text
+def scrape_company_news(url):
+    try:
+        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
+        response = requests.get(url, headers=headers, timeout=10)
+        soup = BeautifulSoup(response.content, 'html.parser')
+        # This is a basic scraper. You'll need to adjust the selectors for each website
+        articles = soup.find_all('article') or soup.find_all('div', class_='news-item') or soup.find_all('div', class_='press-release')
+        news_items = []
+        for article in articles[:5]:  # Limit to top 5 news items per company
+            title = article.find('h2') or article.find('h3') or article.find('a')
+            link = article.find('a')
+            if title and link:
+                news_items.append({
+                    'title': title.text.strip(),
+                    'url': urljoin(url, link['href']),
+                    'source': urlparse(url).netloc
+                })
+        return news_items
+    except Exception as e:
+        logging.error(f"Error scraping {url}: {str(e)}")
+        return []
+def get_company_news():
+    with ThreadPoolExecutor(max_workers=10) as executor:
+        future_to_url = {executor.submit(scrape_company_news, url): url for url in ENERGY_COMPANIES}
+        all_company_news = []
+        for future in as_completed(future_to_url):
+            all_company_news.extend(future.result())
+            time.sleep(random.uniform(0.5, 1.5))  # Random delay to avoid overwhelming servers
+    return all_company_news
 def get_energy_news(query):
     try:
+        articles = newsapi.get_everything(q=query, language='en', sort_by='publishedAt', page_size=20)
         return articles['articles']
     except Exception as e:
         logging.error(f"Error fetching news: {e}")
         return []
+def analyze_news_item(item, query, is_company_news=False):
+    source = item.get('source', {}).get('name') if not is_company_news else item.get('source')
+    title = item.get('title', 'No title')
+    content = item.get('description', '') or item.get('content', '') or ''
+    url = item.get('url', '#')
+    prompt = f"""
+    Analyze the following news item in the context of the energy market:
+    Query: {query}
+    Source: {source}
     Title: {title}
     Content: {content}
+    URL: {url}
+    Is this news item directly relevant to "{query}" in the context of the energy market?
+    Answer ONLY 'YES' or 'NO', followed by a brief explanation.
+    If YES, provide:
+    1. A concise 2-3 sentence summary of the news.
+    2. Key points (up to 3 bullet points).
+    3. Specific impact on the energy market related to {query} (1-2 sentences).
     """
     try:
         response = model.generate_content(prompt)
+        analysis = response.text.strip()
+        if analysis.startswith("YES"):
+            return {
+                'title': title,
+                'link': url,
+                'source': source,
+                'analysis': analysis.split("YES", 1)[1].strip(),
+                'is_company_news': is_company_news
+            }
+        return None
     except Exception as e:
+        logging.error(f"Error analyzing news item: {e}")
+        return None
+def filter_and_analyze_news(query, articles, company_news):
+    all_news = articles + company_news
     filtered_and_analyzed_news = []
+    with ThreadPoolExecutor(max_workers=20) as executor:
+        future_to_item = {executor.submit(analyze_news_item, item, query, isinstance(item, dict)): item for item in all_news}
+        for future in as_completed(future_to_item):  # Changed from future_to_url to future_to_item
+            result = future.result()
+            if result:
+                filtered_and_analyzed_news.append(result)
+            if len(filtered_and_analyzed_news) >= 20:
                 break
     return filtered_and_analyzed_news
     if not filtered_news:
         return f"No relevant news found for '{query}' in the energy market context."
+    general_news = [item for item in filtered_news if not item.get('is_company_news')]
+    company_news = [item for item in filtered_news if item.get('is_company_news')]
     summaries = [item.get('analysis', '') for item in filtered_news]
     combined_summary = "\n\n".join(summaries)
     prompt = f"""
+    Based on the following summaries of recent news articles and company announcements related to '{query}' in the energy market:
     {combined_summary}
+    Provide a comprehensive market summary that:
     1. Highlights the current trends and developments related to {query} in the energy market.
     2. Identifies any significant impacts or potential changes in the market.
     3. Mentions any notable events or decisions affecting this area.
+    4. Compares and contrasts information from general news sources and energy company announcements.
+    5. Identifies any discrepancies or complementary information between general news and company-specific news.
+    Keep the summary focused on factual information derived from the news articles and company announcements, without adding speculation or personal opinions.
+    Organize the summary into clear sections with appropriate subheadings.
     """
     try:
     role = data.get('role')
     file_id = data.get('file_id')
     news_context = data.get('newsContext')
+    try:
+        if news_context:
+            # Process query with news context
             prompt = f"""
+            You are an AI News Analyst specializing in the energy market. Use the following news context and your general knowledge to answer the query.
             News Context:
+            Market Summary: {news_context.get('market_summary', 'No market summary available.')}
+            Top Articles:
+            {' '.join([f"- {article['title']}: {article['summary']}" for article in news_context.get('top_articles', [])])}
+            Query: {query}
+            Provide a comprehensive answer that:
+            1. Directly addresses the query using information from the news context.
+            2. Incorporates relevant general knowledge about the energy market.
+            3. Highlights any connections or insights between the query and the recent news.
+            4. Offers a balanced perspective, considering both general news and company-specific announcements.
+            5. Suggests potential implications or future trends based on the available information.
+            Format your response with clear headings and bullet points where appropriate.
             """
             response = model.generate_content(prompt)
             return jsonify({'response': response.text})
     query = data.get('query')
     try:
         all_articles = get_energy_news(query)
+        company_news = get_company_news()
+        filtered_news = filter_and_analyze_news(query, all_articles, company_news)
         market_summary = generate_market_summary(query, filtered_news)
         # Prepare the top 10 articles with summaries
             top_articles.append({
                 'title': article.get('title', 'No title'),
                 'url': article.get('link', '#'),
+                'source': article.get('source', 'Unknown'),
+                'summary': summary,
+                'is_company_news': article.get('is_company_news', False)
             })
         return jsonify({