anuragRisksek commited on
Commit
192f98f
·
1 Parent(s): b74087d

added all files

Browse files
Files changed (6) hide show
  1. README.md +0 -12
  2. __pycache__/utils.cpython-310.pyc +0 -0
  3. api.py +150 -0
  4. app.py +33 -0
  5. requirements.txt +10 -0
  6. utils.py +35 -0
README.md CHANGED
@@ -1,12 +0,0 @@
1
- ---
2
- title: Akaike Assignment
3
- emoji: 🐢
4
- colorFrom: green
5
- colorTo: yellow
6
- sdk: streamlit
7
- sdk_version: 1.43.2
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
__pycache__/utils.cpython-310.pyc ADDED
Binary file (1.18 kB). View file
 
api.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, send_file
2
+ from bs4 import BeautifulSoup
3
+ from newspaper import Article
4
+ from textblob import TextBlob
5
+ from newsapi import NewsApiClient
6
+ from transformers import pipeline
7
+ import requests
8
+ from utils import *
9
+ import pandas as pd
10
+ import base64
11
+
12
+ app = Flask(__name__)
13
+
14
+ newsapi = NewsApiClient(api_key='YOUR_NEWS_API_KEY') # Replace with your API key
15
+
16
+ @app.route('/analyze_news', methods=['GET'])
17
+ def analyze_news():
18
+ company = request.args.get('company')
19
+ source = request.args.get('company')
20
+ if not company or not source:
21
+ return jsonify({"error": "Please provide a company name as a query parameter"}), 400
22
+
23
+ all_articles = []
24
+ output = {"Company": f"{company}", "Articles": all_articles}
25
+
26
+ if source == "NewsOrg":
27
+ # Fetch articles from News API
28
+ response = newsapi.get_everything(q=company, page_size=5, sort_by='publishedAt', language='en')
29
+
30
+ results = []
31
+ sentiment_count = {"Positive": 0, "Negative": 0, "Neutral": 0}
32
+
33
+ for idx, article in enumerate(response['articles']):
34
+ url = article.get("url")
35
+ news_article = Article(url)
36
+ try:
37
+ news_article.download()
38
+ news_article.parse()
39
+ except:
40
+ continue
41
+
42
+ blob = TextBlob(news_article.text)
43
+ polarity = blob.sentiment.polarity
44
+
45
+ if polarity > 0.3:
46
+ sentiment = "Positive"
47
+ elif polarity < -0.3:
48
+ sentiment = "Negative"
49
+ else:
50
+ sentiment = "Neutral"
51
+
52
+ sentiment_count[sentiment] += 1
53
+
54
+ results.append({
55
+ "title": article.get("title"),
56
+ "author": article.get("author"),
57
+ "summary": article.get("description"),
58
+ "sentiment": sentiment,
59
+ "url": article.get("url")
60
+ })
61
+
62
+ return jsonify({
63
+ "company": company,
64
+ "sentiment_distribution": sentiment_count,
65
+ "articles": results
66
+ })
67
+ elif source == "Yahoo News":
68
+ url = f"https://finance.yahoo.com/quote/{company}/news/"
69
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0'}
70
+ response = requests.get(url, headers=headers)
71
+
72
+ if response.status_code != 200:
73
+ print("Failed to fetch news articles")
74
+ return {}
75
+
76
+ paragraphs = []
77
+ titles = []
78
+ summaries = []
79
+ soup = BeautifulSoup(response.content, 'html.parser')
80
+
81
+ overall_sentiment_count = 0
82
+ positive_sentiment_count = 0
83
+ negative_sentiment_count = 0
84
+ neutral_sentiment_count = 0
85
+
86
+ for news in soup.find_all("div", class_="holder yf-1napat3"):
87
+ title_all = news.find_all('h3', class_="clamp yf-82qtw3")
88
+ summary_all = news.find_all('p', class_="clamp yf-82qtw3")
89
+ for title, summary in zip(title_all, summary_all):
90
+ title_text = title.get_text()
91
+ summary_text = summary.get_text()
92
+ paragraph = title_text + ' ' + summary_text
93
+ titles.append(title_text)
94
+ summaries.append(summary_text)
95
+ paragraphs.append(paragraph)
96
+
97
+ # Analyze sentiment and prepare the output
98
+ for i, paragraph in enumerate(paragraphs):
99
+ sentiment = analyze_sentiment(paragraph)
100
+ if sentiment == "POSITIVE":
101
+ positive_sentiment_count += 1
102
+ overall_sentiment_count += 1
103
+ elif sentiment == "NEGATIVE":
104
+ negative_sentiment_count += 1
105
+ overall_sentiment_count -= 1
106
+ else:
107
+ neutral_sentiment_count += 1
108
+ # top_words =
109
+
110
+
111
+ article = {
112
+ "Title": titles[i],
113
+ "Summary": summaries[i],
114
+ "Sentiment": sentiment
115
+ }
116
+
117
+ all_articles.append(article)
118
+
119
+ output["Comparitive Sentiment Score"]["Sentiment Distribution"] = {
120
+ "Positive": positive_sentiment_count,
121
+ "Negative": negative_sentiment_count,
122
+ "Neutral": neutral_sentiment_count,
123
+ }
124
+
125
+ if overall_sentiment_count>0:
126
+ output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly positive. Potential stock growth expected."
127
+ elif overall_sentiment_count<0:
128
+ output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly negative. Potential stock decline expected."
129
+ else:
130
+ output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly neutral. Stocks going to stay stagnant for some time."
131
+
132
+ df = pd.DataFrame(article)
133
+ text_to_summarize = " ".join([d['Title'] + " " + d['summary'] for d in article[:5]])
134
+ summary_final = summarize_text(text_to_summarize)
135
+
136
+ audio_path = generate_hindi_tts(summary_final)
137
+ if audio_path:# and os.path.exists(audio_path):
138
+ # Convert audio file to base64
139
+ with open(audio_path, "rb") as f:
140
+ audio_base64 = base64.b64encode(f.read()).decode('utf-8')
141
+
142
+ output["Audio"] = audio_base64
143
+
144
+ return output
145
+
146
+ else:
147
+ return jsonify({"error": "Failed to generate audio"}), 500
148
+
149
+ if __name__ == '__main__':
150
+ app.run(debug=True, port=8000)
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+
4
+ st.title("Company News Sentiment Analyzer")
5
+
6
+ company = st.text_input("Enter Company Name", placeholder="Example: Tesla")
7
+ source = st.selectbox(
8
+ "Select the source you want news from: ", ("NewsOrg", "Yahoo News")
9
+ )
10
+
11
+ if st.button("Fetch News & Analyze"):
12
+ if not company or not source:
13
+ st.error("Please enter a company name! or select the source")
14
+ else:
15
+ with st.spinner("Fetching from API..."):
16
+ api_url = f"http://localhost:8000/analyze_news?company={company}&source={source}"
17
+ response = requests.get(api_url)
18
+
19
+ if response.status_code == 200:
20
+ data = response.json()
21
+
22
+ st.subheader(f"Sentiment Distribution for {company}")
23
+ st.json(data["sentiment_distribution"])
24
+
25
+ st.subheader("Articles:")
26
+ for idx, article in enumerate(data["articles"]):
27
+ st.write(f"**{idx+1}. Title:** {article['title']}")
28
+ st.write(f"**Sentiment:** {article['sentiment']}")
29
+ st.write(f"**Summary:** {article['summary']}")
30
+ st.write(f"[Read Full Article]({article['url']})")
31
+ st.markdown("---")
32
+ else:
33
+ st.error("Failed to fetch data from API.")
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ flask
2
+ streamlit
3
+ beautifulsoup4
4
+ torch
5
+ transformers
6
+ gtts
7
+ newspaper3k
8
+ pandas
9
+ textblob
10
+ requests
utils.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from gtts import gTTS
3
+
4
+ # Loading models
5
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Load summarizer
6
+ sentiment_analyzer = pipeline("sentiment-analysis") # Load sentiment analyzer
7
+ # classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") # Load classifier
8
+
9
+ def analyze_sentiment(text):
10
+ result = sentiment_analyzer(text[:500])[0]
11
+ return result['label']
12
+
13
+ def summarize_text(text):
14
+ cleaned_text = text.strip().replace("\n", " ")
15
+ cleaned_text = cleaned_text[:3000] # Limit to avoid token overflow
16
+
17
+ result = summarizer(
18
+ cleaned_text,
19
+ max_length=130,
20
+ min_length=30,
21
+ do_sample=False
22
+ )
23
+
24
+ summary_text = result[0]['summary_text']
25
+ return summary_text
26
+
27
+ def generate_hindi_tts(text, filename="output.mp3"):
28
+ try:
29
+ tts = gTTS(text=text, lang='hi')
30
+ tts.save(filename)
31
+ print(f"Hindi audio saved to {filename}")
32
+ return filename
33
+ except Exception as e:
34
+ print(f"Error in generating the TTS: {e}")
35
+ return None