Spaces:

AA-6055
/

akaike_assignment_final

Sleeping

App Files Files Community

akaike_assignment_final / api.py

AA-6055

Update api.py

9a59367 verified 9 months ago

raw

history blame contribute delete

8.18 kB

	from flask import Flask, request, jsonify, send_file
	from bs4 import BeautifulSoup
	from newspaper import Article
	from textblob import TextBlob
	# from newsapi import NewsApiClient
	from transformers import pipeline
	import requests
	from utils import *
	import pandas as pd
	import base64
	import json
	# import nest_asyncio

	app = Flask(__name__)

	# newsapi = NewsApiClient(api_key='YOUR_NEWS_API_KEY') # Replace with your API key

	# @app.route('/analyze_news', methods=['GET'])
	# def analyze_news():
	def analyze_news(company, source):
	company = company
	source = source
	# company = request.args.get('company')
	# source = request.args.get('source')
	if not company or not source:
	return jsonify({"error": "Please provide a company name as a query parameter"}), 400

	all_articles = []
	output = {"Company": f"{company}", "Articles": all_articles}

	overall_sentiment_count = 0
	sentiment_count = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}

	if source == "NewsOrg":
	# Fetch articles from News API
	# response = newsapi.get_everything(q=company, page_size=5, sort_by='publishedAt', language='en')

	params = {"q":"tesla","apiKey":"7396bdb0bc0a42c5b5b0c9c5945d32fa", "pagesize":10, "sortBy": "publishedAt", "language":'en'}
	articles = requests.get(url = "https://newsapi.org/v2/everything", params= params)
	articles = json.loads(articles.text)

	# results = []
	# sentiment_count = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}

	# print(f">>>>>>>>>>>>>>>>>>>>>{articles}")
	for idx, article in enumerate(articles["articles"]):
	# print(f">>>>>>>>>>>>>>>>>>>>>{article}")
	url = article.get("url")
	news_article = Article(url)
	try:
	news_article.download()
	news_article.parse()
	except:
	continue

	blob = TextBlob(news_article.text)
	polarity = blob.sentiment.polarity

	if polarity > 0.3:
	sentiment = "POSITIVE"
	overall_sentiment_count += 1
	elif polarity < -0.3:
	sentiment = "NEGATIVE"
	overall_sentiment_count -= 1
	else:
	sentiment = "NEUTRAL"
	# neutral_sentiment_count += 1

	sentiment_count[sentiment] += 1

	all_articles.append({
	"Title": article.get("title"),
	"Summary": article.get("description"),
	"Sentiment": sentiment
	})

	output["Comparitive Sentiment Score"] = {
	"Sentiment Distribution": sentiment_count
	}

	if overall_sentiment_count>0:
	output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly positive. Potential stock growth expected."
	elif overall_sentiment_count<0:
	output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly negative. Potential stock decline expected."
	else:
	output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly neutral. Stocks going to stay stagnant for some time."

	print(output)
	print(f"{'>'*5} Starting text summarization.")

	# return jsonify({
	# "company": company,
	# "sentiment_distribution": sentiment_count,
	# "articles": results
	# })
	# df = pd.DataFrame(all_articles)
	text_to_summarize = " ".join([d['Title'] + " " + d['Summary'] for d in all_articles[:5]])
	summary_final = summarize_text(text_to_summarize)

	audio_path = generate_hindi_tts(summary_final)
	if audio_path:# and os.path.exists(audio_path):
	# Convert audio file to base64
	with open(audio_path, "rb") as f:
	audio_base64 = base64.b64encode(f.read()).decode('utf-8')

	output["Audio"] = audio_base64

	return output

	else:
	return jsonify({"error": "Failed to generate audio"}), 500

	elif source == "Yahoo News":
	url = f"https://finance.yahoo.com/quote/{company}/news/"
	headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0'}
	response = requests.get(url, headers=headers)

	if response.status_code != 200:
	print("Failed to fetch news articles")
	return {}

	paragraphs = []
	titles = []
	summaries = []
	soup = BeautifulSoup(response.content, 'html.parser')


	for news in soup.find_all("div", class_="holder yf-1napat3"):
	title_all = news.find_all('h3', class_="clamp yf-82qtw3")
	summary_all = news.find_all('p', class_="clamp yf-82qtw3")
	for title, summary in zip(title_all, summary_all):
	title_text = title.get_text()
	summary_text = summary.get_text()
	paragraph = title_text + ' ' + summary_text
	titles.append(title_text)
	summaries.append(summary_text)
	paragraphs.append(paragraph)

	# Analyze sentiment and prepare the output
	for i, paragraph in enumerate(paragraphs):
	sentiment = analyze_sentiment(paragraph)
	if sentiment == "POSITIVE":
	# positive_sentiment_count += 1
	overall_sentiment_count += 1
	elif sentiment == "NEGATIVE":
	# negative_sentiment_count += 1
	overall_sentiment_count -= 1
	# else:
	# neutral_sentiment_count += 1
	# top_words =
	sentiment_count[sentiment] += 1

	article = {
	"Title": titles[i],
	"Summary": summaries[i],
	"Sentiment": sentiment
	}

	all_articles.append(article)

	output["Comparitive Sentiment Score"] = {
	"Sentiment Distribution": sentiment_count
	}

	if overall_sentiment_count>0:
	output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly positive. Potential stock growth expected."
	elif overall_sentiment_count<0:
	output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly negative. Potential stock decline expected."
	else:
	output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly neutral. Stocks going to stay stagnant for some time."

	# df = pd.DataFrame(all_articles)
	text_to_summarize = " ".join([d['Title'] + " " + d['summary'] for d in all_articles[:5]])
	summary_final = summarize_text(text_to_summarize)

	audio_path = generate_hindi_tts(summary_final)
	if audio_path:# and os.path.exists(audio_path):
	# Convert audio file to base64
	with open(audio_path, "rb") as f:
	audio_base64 = base64.b64encode(f.read()).decode('utf-8')

	output["Audio"] = audio_base64

	return output

	else:
	return jsonify({"error": "Failed to generate audio"}), 500
	# df = pd.DataFrame(all_articles)
	# text_to_summarize = " ".join([d['Title'] + " " + d['summary'] for d in article[:5]])
	# summary_final = summarize_text(text_to_summarize)

	# audio_path = generate_hindi_tts(summary_final)
	# if audio_path:# and os.path.exists(audio_path):
	# # Convert audio file to base64
	# with open(audio_path, "rb") as f:
	# audio_base64 = base64.b64encode(f.read()).decode('utf-8')

	# output["Audio"] = audio_base64

	# return output

	# else:
	# return jsonify({"error": "Failed to generate audio"}), 500
	else:
	return jsonify({"error": "Invalid source provided"}), 400

	if __name__ == '__main__':
	app.run(debug=True, port=8000)