Spaces:

AA-6055
/

akaike_assignment_final

Sleeping

File size: 8,184 Bytes

from flask import Flask, request, jsonify, send_file
from bs4 import BeautifulSoup
from newspaper import Article
from textblob import TextBlob
# from newsapi import NewsApiClient
from transformers import pipeline
import requests
from utils import *
import pandas as pd
import base64
import json
# import nest_asyncio

app = Flask(__name__)

# newsapi = NewsApiClient(api_key='YOUR_NEWS_API_KEY')  # Replace with your API key

# @app.route('/analyze_news', methods=['GET'])
# def analyze_news():
def analyze_news(company, source):
    company = company
    source = source
    # company = request.args.get('company')
    # source = request.args.get('source')
    if not company or not source:
        return jsonify({"error": "Please provide a company name as a query parameter"}), 400
    
    all_articles = []
    output = {"Company": f"{company}", "Articles": all_articles}
    
    overall_sentiment_count = 0
    sentiment_count = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}

    if source == "NewsOrg":
        # Fetch articles from News API
        # response = newsapi.get_everything(q=company, page_size=5, sort_by='publishedAt', language='en')

        params = {"q":"tesla","apiKey":"7396bdb0bc0a42c5b5b0c9c5945d32fa", "pagesize":10, "sortBy": "publishedAt", "language":'en'}
        articles = requests.get(url = "https://newsapi.org/v2/everything", params= params)
        articles = json.loads(articles.text)

        # results = []
        # sentiment_count = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}

        # print(f">>>>>>>>>>>>>>>>>>>>>{articles}")
        for idx, article in enumerate(articles["articles"]):
            # print(f">>>>>>>>>>>>>>>>>>>>>{article}")
            url = article.get("url")
            news_article = Article(url)
            try:
                news_article.download()
                news_article.parse()
            except:
                continue

            blob = TextBlob(news_article.text)
            polarity = blob.sentiment.polarity

            if polarity > 0.3:
                sentiment = "POSITIVE"
                overall_sentiment_count += 1
            elif polarity < -0.3:
                sentiment = "NEGATIVE"
                overall_sentiment_count -= 1
            else:
                sentiment = "NEUTRAL"
                # neutral_sentiment_count += 1
            
            sentiment_count[sentiment] += 1

            all_articles.append({
                "Title": article.get("title"),
                "Summary": article.get("description"),
                "Sentiment": sentiment
            })

        output["Comparitive Sentiment Score"] = {
            "Sentiment Distribution": sentiment_count
        }

        if overall_sentiment_count>0:
            output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly positive. Potential stock growth expected."
        elif overall_sentiment_count<0:
            output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly negative. Potential stock decline expected."
        else:
            output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly neutral. Stocks going to stay stagnant for some time."
        
        print(output)
        print(f"{'>'*5} Starting text summarization.")

        # return jsonify({
        #     "company": company,
        #     "sentiment_distribution": sentiment_count,
        #     "articles": results
        # })
        # df = pd.DataFrame(all_articles)
        text_to_summarize = " ".join([d['Title'] + " " + d['Summary'] for d in all_articles[:5]])
        summary_final = summarize_text(text_to_summarize)

        audio_path = generate_hindi_tts(summary_final)
        if audio_path:# and os.path.exists(audio_path):
            # Convert audio file to base64
            with open(audio_path, "rb") as f:
                audio_base64 = base64.b64encode(f.read()).decode('utf-8')
            
            output["Audio"] = audio_base64

            return output

        else:
            return jsonify({"error": "Failed to generate audio"}), 500
    
    elif source == "Yahoo News":
        url = f"https://finance.yahoo.com/quote/{company}/news/"
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0'}
        response = requests.get(url, headers=headers)

        if response.status_code != 200:
            print("Failed to fetch news articles")
            return {}

        paragraphs = []
        titles = []
        summaries = []
        soup = BeautifulSoup(response.content, 'html.parser')


        for news in soup.find_all("div", class_="holder yf-1napat3"):
            title_all = news.find_all('h3', class_="clamp yf-82qtw3")
            summary_all = news.find_all('p', class_="clamp yf-82qtw3")
            for title, summary in zip(title_all, summary_all):
                title_text = title.get_text()
                summary_text = summary.get_text()
                paragraph = title_text + ' ' + summary_text
                titles.append(title_text)
                summaries.append(summary_text)
                paragraphs.append(paragraph)
        
            # Analyze sentiment and prepare the output
            for i, paragraph in enumerate(paragraphs):
                sentiment = analyze_sentiment(paragraph)
                if sentiment == "POSITIVE":
                    # positive_sentiment_count += 1
                    overall_sentiment_count += 1
                elif sentiment == "NEGATIVE":
                    # negative_sentiment_count += 1
                    overall_sentiment_count -= 1
                # else:
                #     neutral_sentiment_count += 1
                # top_words = 
                sentiment_count[sentiment] += 1

            article = {
                "Title": titles[i],
                "Summary": summaries[i],
                "Sentiment": sentiment
            }

            all_articles.append(article)

        output["Comparitive Sentiment Score"] = {
            "Sentiment Distribution": sentiment_count
        }
        
        if overall_sentiment_count>0:
            output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly positive. Potential stock growth expected."
        elif overall_sentiment_count<0:
            output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly negative. Potential stock decline expected."
        else:
            output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly neutral. Stocks going to stay stagnant for some time."
        
        # df = pd.DataFrame(all_articles)
        text_to_summarize = " ".join([d['Title'] + " " + d['summary'] for d in all_articles[:5]])
        summary_final = summarize_text(text_to_summarize)

        audio_path = generate_hindi_tts(summary_final)
        if audio_path:# and os.path.exists(audio_path):
            # Convert audio file to base64
            with open(audio_path, "rb") as f:
                audio_base64 = base64.b64encode(f.read()).decode('utf-8')
            
            output["Audio"] = audio_base64

            return output

        else:
            return jsonify({"error": "Failed to generate audio"}), 500
        # df = pd.DataFrame(all_articles)
        # text_to_summarize = " ".join([d['Title'] + " " + d['summary'] for d in article[:5]])
        # summary_final = summarize_text(text_to_summarize)

        # audio_path = generate_hindi_tts(summary_final)
        # if audio_path:# and os.path.exists(audio_path):
        #     # Convert audio file to base64
        #     with open(audio_path, "rb") as f:
        #         audio_base64 = base64.b64encode(f.read()).decode('utf-8')
            
        #     output["Audio"] = audio_base64

        #     return output

        # else:
        #     return jsonify({"error": "Failed to generate audio"}), 500
    else:
        return jsonify({"error": "Invalid source provided"}), 400 

if __name__ == '__main__':
    app.run(debug=True, port=8000)