Spaces:

AA-6055
/

akaike_assignment

Configuration error

App Files Files Community

anuragRisksek commited on Mar 23, 2025

Commit

192f98f

1 Parent(s): b74087d

added all files

Browse files

Files changed (6) hide show

README.md +0 -12
__pycache__/utils.cpython-310.pyc +0 -0
api.py +150 -0
app.py +33 -0
requirements.txt +10 -0
utils.py +35 -0

README.md CHANGED Viewed

@@ -1,12 +0,0 @@
----
-title: Akaike Assignment
-emoji: 🐢
-colorFrom: green
-colorTo: yellow
-sdk: streamlit
-sdk_version: 1.43.2
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (1.18 kB). View file

api.py ADDED Viewed

	@@ -0,0 +1,150 @@

+from flask import Flask, request, jsonify, send_file
+from bs4 import BeautifulSoup
+from newspaper import Article
+from textblob import TextBlob
+from newsapi import NewsApiClient
+from transformers import pipeline
+import requests
+from utils import *
+import pandas as pd
+import base64
+app = Flask(__name__)
+newsapi = NewsApiClient(api_key='YOUR_NEWS_API_KEY')  # Replace with your API key
+@app.route('/analyze_news', methods=['GET'])
+def analyze_news():
+    company = request.args.get('company')
+    source = request.args.get('company')
+    if not company or not source:
+        return jsonify({"error": "Please provide a company name as a query parameter"}), 400
+    all_articles = []
+    output = {"Company": f"{company}", "Articles": all_articles}
+    if source == "NewsOrg":
+        # Fetch articles from News API
+        response = newsapi.get_everything(q=company, page_size=5, sort_by='publishedAt', language='en')
+        results = []
+        sentiment_count = {"Positive": 0, "Negative": 0, "Neutral": 0}
+        for idx, article in enumerate(response['articles']):
+            url = article.get("url")
+            news_article = Article(url)
+            try:
+                news_article.download()
+                news_article.parse()
+            except:
+                continue
+            blob = TextBlob(news_article.text)
+            polarity = blob.sentiment.polarity
+            if polarity > 0.3:
+                sentiment = "Positive"
+            elif polarity < -0.3:
+                sentiment = "Negative"
+            else:
+                sentiment = "Neutral"
+            sentiment_count[sentiment] += 1
+            results.append({
+                "title": article.get("title"),
+                "author": article.get("author"),
+                "summary": article.get("description"),
+                "sentiment": sentiment,
+                "url": article.get("url")
+            })
+        return jsonify({
+            "company": company,
+            "sentiment_distribution": sentiment_count,
+            "articles": results
+        })
+    elif source == "Yahoo News":
+        url = f"https://finance.yahoo.com/quote/{company}/news/"
+        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0'}
+        response = requests.get(url, headers=headers)
+        if response.status_code != 200:
+            print("Failed to fetch news articles")
+            return {}
+        paragraphs = []
+        titles = []
+        summaries = []
+        soup = BeautifulSoup(response.content, 'html.parser')
+        overall_sentiment_count = 0
+        positive_sentiment_count = 0
+        negative_sentiment_count = 0
+        neutral_sentiment_count = 0
+        for news in soup.find_all("div", class_="holder yf-1napat3"):
+            title_all = news.find_all('h3', class_="clamp yf-82qtw3")
+            summary_all = news.find_all('p', class_="clamp yf-82qtw3")
+            for title, summary in zip(title_all, summary_all):
+                title_text = title.get_text()
+                summary_text = summary.get_text()
+                paragraph = title_text + ' ' + summary_text
+                titles.append(title_text)
+                summaries.append(summary_text)
+                paragraphs.append(paragraph)
+            # Analyze sentiment and prepare the output
+            for i, paragraph in enumerate(paragraphs):
+                sentiment = analyze_sentiment(paragraph)
+                if sentiment == "POSITIVE":
+                    positive_sentiment_count += 1
+                    overall_sentiment_count += 1
+                elif sentiment == "NEGATIVE":
+                    negative_sentiment_count += 1
+                    overall_sentiment_count -= 1
+                else:
+                    neutral_sentiment_count += 1
+                # top_words =
+            article = {
+                "Title": titles[i],
+                "Summary": summaries[i],
+                "Sentiment": sentiment
+            }
+            all_articles.append(article)
+        output["Comparitive Sentiment Score"]["Sentiment Distribution"] = {
+            "Positive": positive_sentiment_count,
+            "Negative": negative_sentiment_count,
+            "Neutral": neutral_sentiment_count,
+        }
+        if overall_sentiment_count>0:
+            output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly positive. Potential stock growth expected."
+        elif overall_sentiment_count<0:
+            output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly negative. Potential stock decline expected."
+        else:
+            output["Final Sentiment Analysis"] = f"{company.capitalize()}'s lastest news is mostly neutral. Stocks going to stay stagnant for some time."
+        df = pd.DataFrame(article)
+        text_to_summarize = " ".join([d['Title'] + " " + d['summary'] for d in article[:5]])
+        summary_final = summarize_text(text_to_summarize)
+        audio_path = generate_hindi_tts(summary_final)
+        if audio_path:# and os.path.exists(audio_path):
+            # Convert audio file to base64
+            with open(audio_path, "rb") as f:
+                audio_base64 = base64.b64encode(f.read()).decode('utf-8')
+            output["Audio"] = audio_base64
+            return output
+        else:
+            return jsonify({"error": "Failed to generate audio"}), 500
+if __name__ == '__main__':
+    app.run(debug=True, port=8000)

app.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import streamlit as st
+import requests
+st.title("Company News Sentiment Analyzer")
+company = st.text_input("Enter Company Name", placeholder="Example: Tesla")
+source =  st.selectbox(
+    "Select the source you want news from: ", ("NewsOrg", "Yahoo News")
+)
+if st.button("Fetch News & Analyze"):
+    if not company or not source:
+        st.error("Please enter a company name! or select the source")
+    else:
+        with st.spinner("Fetching from API..."):
+            api_url = f"http://localhost:8000/analyze_news?company={company}&source={source}"
+            response = requests.get(api_url)
+            if response.status_code == 200:
+                data = response.json()
+                st.subheader(f"Sentiment Distribution for {company}")
+                st.json(data["sentiment_distribution"])
+                st.subheader("Articles:")
+                for idx, article in enumerate(data["articles"]):
+                    st.write(f"**{idx+1}. Title:** {article['title']}")
+                    st.write(f"**Sentiment:** {article['sentiment']}")
+                    st.write(f"**Summary:** {article['summary']}")
+                    st.write(f"[Read Full Article]({article['url']})")
+                    st.markdown("---")
+            else:
+                st.error("Failed to fetch data from API.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+flask
+streamlit
+beautifulsoup4
+torch
+transformers
+gtts
+newspaper3k
+pandas
+textblob
+requests

utils.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from transformers import pipeline
+from gtts import gTTS
+# Loading models
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Load summarizer
+sentiment_analyzer = pipeline("sentiment-analysis") # Load sentiment analyzer
+# classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") # Load classifier
+def analyze_sentiment(text):
+    result = sentiment_analyzer(text[:500])[0]
+    return result['label']
+def summarize_text(text):
+    cleaned_text = text.strip().replace("\n", " ")
+    cleaned_text = cleaned_text[:3000]  # Limit to avoid token overflow
+    result = summarizer(
+        cleaned_text,
+        max_length=130,
+        min_length=30,
+        do_sample=False
+    )
+    summary_text = result[0]['summary_text']
+    return summary_text
+def generate_hindi_tts(text, filename="output.mp3"):
+    try:
+        tts = gTTS(text=text, lang='hi')
+        tts.save(filename)
+        print(f"Hindi audio saved to {filename}")
+        return filename
+    except Exception as e:
+        print(f"Error in generating the TTS: {e}")
+        return None