Spaces:
Sleeping
Sleeping
| from flask import Flask, request, jsonify, send_file | |
| from bs4 import BeautifulSoup | |
| from newspaper import Article | |
| from textblob import TextBlob | |
| # from newsapi import NewsApiClient | |
| from transformers import pipeline | |
| import requests | |
| from utils import * | |
| import pandas as pd | |
| import base64 | |
| import json | |
| # import nest_asyncio | |
| app = Flask(__name__) | |
| # newsapi = NewsApiClient(api_key='YOUR_NEWS_API_KEY') # Replace with your API key | |
| # @app.route('/analyze_news', methods=['GET']) | |
| # def analyze_news(): | |
| def analyze_news(company, source): | |
| company = company | |
| source = source | |
| # company = request.args.get('company') | |
| # source = request.args.get('source') | |
| if not company or not source: | |
| return jsonify({"error": "Please provide a company name as a query parameter"}), 400 | |
| all_articles = [] | |
| output = {"Company": f"{company}", "Articles": all_articles} | |
| overall_sentiment_count = 0 | |
| sentiment_count = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0} | |
| if source == "NewsOrg": | |
| # Fetch articles from News API | |
| # response = newsapi.get_everything(q=company, page_size=5, sort_by='publishedAt', language='en') | |
| params = {"q":"tesla","apiKey":"7396bdb0bc0a42c5b5b0c9c5945d32fa", "pagesize":10, "sortBy": "publishedAt", "language":'en'} | |
| articles = requests.get(url = "https://newsapi.org/v2/everything", params= params) | |
| articles = json.loads(articles.text) | |
| # results = [] | |
| # sentiment_count = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0} | |
| # print(f">>>>>>>>>>>>>>>>>>>>>{articles}") | |
| for idx, article in enumerate(articles["articles"]): | |
| # print(f">>>>>>>>>>>>>>>>>>>>>{article}") | |
| url = article.get("url") | |
| news_article = Article(url) | |
| try: | |
| news_article.download() | |
| news_article.parse() | |
| except: | |
| continue | |
| blob = TextBlob(news_article.text) | |
| polarity = blob.sentiment.polarity | |
| if polarity > 0.3: | |
| sentiment = "POSITIVE" | |
| overall_sentiment_count += 1 | |
| elif polarity < -0.3: | |
| sentiment = "NEGATIVE" | |
| overall_sentiment_count -= 1 | |
| else: | |
| sentiment = "NEUTRAL" | |
| # neutral_sentiment_count += 1 | |
| sentiment_count[sentiment] += 1 | |
| all_articles.append({ | |
| "Title": article.get("title"), | |
| "Summary": article.get("description"), | |
| "Sentiment": sentiment | |
| }) | |
| output["Comparitive Sentiment Score"] = { | |
| "Sentiment Distribution": sentiment_count | |
| } | |
| if overall_sentiment_count>0: | |
| output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly positive. Potential stock growth expected." | |
| elif overall_sentiment_count<0: | |
| output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly negative. Potential stock decline expected." | |
| else: | |
| output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly neutral. Stocks going to stay stagnant for some time." | |
| print(output) | |
| print(f"{'>'*5} Starting text summarization.") | |
| # return jsonify({ | |
| # "company": company, | |
| # "sentiment_distribution": sentiment_count, | |
| # "articles": results | |
| # }) | |
| # df = pd.DataFrame(all_articles) | |
| text_to_summarize = " ".join([d['Title'] + " " + d['Summary'] for d in all_articles[:5]]) | |
| summary_final = summarize_text(text_to_summarize) | |
| audio_path = generate_hindi_tts(summary_final) | |
| if audio_path:# and os.path.exists(audio_path): | |
| # Convert audio file to base64 | |
| with open(audio_path, "rb") as f: | |
| audio_base64 = base64.b64encode(f.read()).decode('utf-8') | |
| output["Audio"] = audio_base64 | |
| return output | |
| else: | |
| return jsonify({"error": "Failed to generate audio"}), 500 | |
| elif source == "Yahoo News": | |
| url = f"https://finance.yahoo.com/quote/{company}/news/" | |
| headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0'} | |
| response = requests.get(url, headers=headers) | |
| if response.status_code != 200: | |
| print("Failed to fetch news articles") | |
| return {} | |
| paragraphs = [] | |
| titles = [] | |
| summaries = [] | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| for news in soup.find_all("div", class_="holder yf-1napat3"): | |
| title_all = news.find_all('h3', class_="clamp yf-82qtw3") | |
| summary_all = news.find_all('p', class_="clamp yf-82qtw3") | |
| for title, summary in zip(title_all, summary_all): | |
| title_text = title.get_text() | |
| summary_text = summary.get_text() | |
| paragraph = title_text + ' ' + summary_text | |
| titles.append(title_text) | |
| summaries.append(summary_text) | |
| paragraphs.append(paragraph) | |
| # Analyze sentiment and prepare the output | |
| for i, paragraph in enumerate(paragraphs): | |
| sentiment = analyze_sentiment(paragraph) | |
| if sentiment == "POSITIVE": | |
| # positive_sentiment_count += 1 | |
| overall_sentiment_count += 1 | |
| elif sentiment == "NEGATIVE": | |
| # negative_sentiment_count += 1 | |
| overall_sentiment_count -= 1 | |
| # else: | |
| # neutral_sentiment_count += 1 | |
| # top_words = | |
| sentiment_count[sentiment] += 1 | |
| article = { | |
| "Title": titles[i], | |
| "Summary": summaries[i], | |
| "Sentiment": sentiment | |
| } | |
| all_articles.append(article) | |
| output["Comparitive Sentiment Score"] = { | |
| "Sentiment Distribution": sentiment_count | |
| } | |
| if overall_sentiment_count>0: | |
| output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly positive. Potential stock growth expected." | |
| elif overall_sentiment_count<0: | |
| output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly negative. Potential stock decline expected." | |
| else: | |
| output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly neutral. Stocks going to stay stagnant for some time." | |
| # df = pd.DataFrame(all_articles) | |
| text_to_summarize = " ".join([d['Title'] + " " + d['summary'] for d in all_articles[:5]]) | |
| summary_final = summarize_text(text_to_summarize) | |
| audio_path = generate_hindi_tts(summary_final) | |
| if audio_path:# and os.path.exists(audio_path): | |
| # Convert audio file to base64 | |
| with open(audio_path, "rb") as f: | |
| audio_base64 = base64.b64encode(f.read()).decode('utf-8') | |
| output["Audio"] = audio_base64 | |
| return output | |
| else: | |
| return jsonify({"error": "Failed to generate audio"}), 500 | |
| # df = pd.DataFrame(all_articles) | |
| # text_to_summarize = " ".join([d['Title'] + " " + d['summary'] for d in article[:5]]) | |
| # summary_final = summarize_text(text_to_summarize) | |
| # audio_path = generate_hindi_tts(summary_final) | |
| # if audio_path:# and os.path.exists(audio_path): | |
| # # Convert audio file to base64 | |
| # with open(audio_path, "rb") as f: | |
| # audio_base64 = base64.b64encode(f.read()).decode('utf-8') | |
| # output["Audio"] = audio_base64 | |
| # return output | |
| # else: | |
| # return jsonify({"error": "Failed to generate audio"}), 500 | |
| else: | |
| return jsonify({"error": "Invalid source provided"}), 400 | |
| if __name__ == '__main__': | |
| app.run(debug=True, port=8000) | |