Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- data_processor.py +41 -0
- sentiment_analyzer.py +48 -0
- visualization.py +26 -0
data_processor.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import os
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
load_dotenv()
|
| 5 |
+
import groq
|
| 6 |
+
groq.api_key = os.getenv("GROQ_API_KEY")
|
| 7 |
+
from agno.agent import Agent
|
| 8 |
+
from agno.models.groq import Groq
|
| 9 |
+
from agno.tools.yfinance import YFinanceTools
|
| 10 |
+
from agno.tools.duckduckgo import DuckDuckGoTools
|
| 11 |
+
from agents.finance_agent import FinanceAgent
|
| 12 |
+
from agents.web_search_agent import WebSearchAgent
|
| 13 |
+
from agents.multi_agent import MultiAgent
|
| 14 |
+
import matplotlib.pyplot as plt
|
| 15 |
+
from textblob import TextBlob
|
| 16 |
+
from agents.finance_agent import FinanceAgent
|
| 17 |
+
def fetch_financial_data(ticker_symbol):
|
| 18 |
+
import yfinance as yf
|
| 19 |
+
ticker = yf.Ticker(ticker_symbol)
|
| 20 |
+
return ticker
|
| 21 |
+
|
| 22 |
+
def clean_data(data):
|
| 23 |
+
if isinstance(data, pd.DataFrame):
|
| 24 |
+
return data.dropna()
|
| 25 |
+
return data
|
| 26 |
+
|
| 27 |
+
def fetch_and_clean_news(ticker_symbol):
|
| 28 |
+
ticker = fetch_financial_data(ticker_symbol)
|
| 29 |
+
news_items = ticker.news
|
| 30 |
+
cleaned_news = [item for item in news_items if item.get('content')]
|
| 31 |
+
return cleaned_news
|
| 32 |
+
|
| 33 |
+
def extract_fundamentals(ticker_symbol):
|
| 34 |
+
ticker = fetch_financial_data(ticker_symbol)
|
| 35 |
+
fundamentals = {
|
| 36 |
+
"longName": ticker.info.get('longName', ticker_symbol),
|
| 37 |
+
"marketCap": ticker.info.get('marketCap'),
|
| 38 |
+
"peRatio": ticker.info.get('forwardPE'),
|
| 39 |
+
"dividendYield": ticker.info.get('dividendYield')
|
| 40 |
+
}
|
| 41 |
+
return clean_data(fundamentals)
|
sentiment_analyzer.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import pipeline
|
| 2 |
+
from rapidfuzz.fuzz import partial_ratio
|
| 3 |
+
import yfinance as yf
|
| 4 |
+
import pandas as pd
|
| 5 |
+
|
| 6 |
+
def analyze_headline_sentiment(headlines):
|
| 7 |
+
sentiment_pipeline = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
|
| 8 |
+
results = []
|
| 9 |
+
for headline in headlines:
|
| 10 |
+
sentiment = sentiment_pipeline(headline)[0]
|
| 11 |
+
results.append({
|
| 12 |
+
"headline": headline,
|
| 13 |
+
"sentiment": sentiment['label'],
|
| 14 |
+
"score": sentiment['score']
|
| 15 |
+
})
|
| 16 |
+
return pd.DataFrame(results)
|
| 17 |
+
|
| 18 |
+
def filter_news_by_sentiment(ticker_symbol, news_items, fuzzy_threshold=60):
|
| 19 |
+
ticker = yf.Ticker(ticker_symbol)
|
| 20 |
+
company_name = ticker.info.get('longName', ticker_symbol)
|
| 21 |
+
|
| 22 |
+
def extract_text(item):
|
| 23 |
+
content = item.get('content', '')
|
| 24 |
+
if isinstance(content, dict):
|
| 25 |
+
return content.get('title', str(content))
|
| 26 |
+
elif isinstance(content, str):
|
| 27 |
+
return content
|
| 28 |
+
else:
|
| 29 |
+
return str(content)
|
| 30 |
+
|
| 31 |
+
filtered_news = []
|
| 32 |
+
for item in news_items:
|
| 33 |
+
text = extract_text(item)
|
| 34 |
+
text_lower = text.lower()
|
| 35 |
+
ticker_present = ticker_symbol.lower() in text_lower
|
| 36 |
+
similarity = partial_ratio(company_name.lower(), text_lower)
|
| 37 |
+
if ticker_present or similarity >= fuzzy_threshold:
|
| 38 |
+
filtered_news.append(text)
|
| 39 |
+
|
| 40 |
+
if len(filtered_news) < 5:
|
| 41 |
+
for item in news_items:
|
| 42 |
+
text = extract_text(item)
|
| 43 |
+
if text not in filtered_news:
|
| 44 |
+
filtered_news.append(text)
|
| 45 |
+
if len(filtered_news) >= 5:
|
| 46 |
+
break
|
| 47 |
+
|
| 48 |
+
return filtered_news[:7] # Return top 7 headlines
|
visualization.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def plot_historical_prices(hist_data, ticker_symbol):
|
| 2 |
+
import matplotlib.pyplot as plt
|
| 3 |
+
|
| 4 |
+
plt.figure(figsize=(10, 5))
|
| 5 |
+
plt.plot(hist_data.index, hist_data['Close'], label=f"{ticker_symbol} Close Price", color="blue")
|
| 6 |
+
plt.title(f"{ticker_symbol} Historical Closing Prices (Last 6 Months)")
|
| 7 |
+
plt.xlabel("Date")
|
| 8 |
+
plt.ylabel("Price (USD)")
|
| 9 |
+
plt.legend()
|
| 10 |
+
plt.grid(True)
|
| 11 |
+
plt.tight_layout()
|
| 12 |
+
plt.show()
|
| 13 |
+
|
| 14 |
+
def plot_sentiment_analysis(sentiment_df):
|
| 15 |
+
import matplotlib.pyplot as plt
|
| 16 |
+
|
| 17 |
+
sentiment_counts = sentiment_df['sentiment'].value_counts()
|
| 18 |
+
plt.figure(figsize=(8, 5))
|
| 19 |
+
sentiment_counts.plot(kind='bar', color=['green', 'red', 'gray'])
|
| 20 |
+
plt.title("Sentiment Analysis of News Headlines")
|
| 21 |
+
plt.xlabel("Sentiment")
|
| 22 |
+
plt.ylabel("Count")
|
| 23 |
+
plt.xticks(rotation=0)
|
| 24 |
+
plt.grid(axis='y')
|
| 25 |
+
plt.tight_layout()
|
| 26 |
+
plt.show()
|