Spaces:
Sleeping
Sleeping
Commit
·
a3c8f1b
0
Parent(s):
Initial commit
Browse files- .gitattributes +35 -0
- README.md +13 -0
- app.py +225 -0
- requirements.txt +9 -0
.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: SentimentAnalysis
|
| 3 |
+
emoji: 📈
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.46.1
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: cc-by-sa-4.0
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import plotly.express as px
|
| 3 |
+
from datasets import load_dataset
|
| 4 |
+
import gradio as gr
|
| 5 |
+
import yfinance as yf
|
| 6 |
+
|
| 7 |
+
df = pd.DataFrame(load_dataset("SelmaNajih001/NewsSentiment")["train"])
|
| 8 |
+
df = df[df["Company"].isin(["Tesla", "Microsoft", "Apple", "Facebook", "Google"])]
|
| 9 |
+
|
| 10 |
+
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
|
| 11 |
+
df['Year'] = df['Date'].dt.year
|
| 12 |
+
df['Month'] = df['Date'].dt.to_period('M')
|
| 13 |
+
df['Day'] = df['Date'].dt.date
|
| 14 |
+
df = df[df['Year'] >= 2015]
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
TICKERS = {
|
| 18 |
+
"Tesla": "TSLA",
|
| 19 |
+
"Microsoft": "MSFT",
|
| 20 |
+
"Apple": "AAPL",
|
| 21 |
+
"Facebook": "META",
|
| 22 |
+
"Google": "GOOGL",
|
| 23 |
+
"NASDAQ": "^IXIC"
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
prices = {}
|
| 27 |
+
for company, ticker in TICKERS.items():
|
| 28 |
+
start_date = "2015-01-01"
|
| 29 |
+
end_date = pd.Timestamp.today()
|
| 30 |
+
df_prices = yf.download(ticker, start=start_date, end=end_date)
|
| 31 |
+
if isinstance(df_prices.columns, pd.MultiIndex):
|
| 32 |
+
df_prices.columns = ['_'.join([str(c) for c in col]).strip() for col in df_prices.columns]
|
| 33 |
+
df_prices = df_prices.reset_index()[['Date', f'Close_{ticker}']]
|
| 34 |
+
if company == "NASDAQ":
|
| 35 |
+
df_prices = df_prices.rename(columns={f'Close_{ticker}': 'Close_NASDAQ'})
|
| 36 |
+
prices[company] = df_prices
|
| 37 |
+
|
| 38 |
+
def get_prices_for_agg(agg_col):
|
| 39 |
+
df_prices_agg = {}
|
| 40 |
+
for company, df_price in prices.items():
|
| 41 |
+
df_temp = df_price.copy()
|
| 42 |
+
col = 'Close_NASDAQ' if company == 'NASDAQ' else f"Close_{TICKERS[company]}"
|
| 43 |
+
df_temp = df_temp.rename(columns={df_temp.columns[1]: col})
|
| 44 |
+
|
| 45 |
+
if agg_col == "Day":
|
| 46 |
+
df_temp = df_temp.set_index('Date').resample('D').mean().interpolate('linear').reset_index()
|
| 47 |
+
elif agg_col == "Month":
|
| 48 |
+
df_temp['Month'] = df_temp['Date'].dt.to_period('M').dt.to_timestamp()
|
| 49 |
+
df_temp = df_temp.groupby('Month')[col].last().reset_index()
|
| 50 |
+
elif agg_col == "Year":
|
| 51 |
+
df_temp['Year'] = df_temp['Date'].dt.year
|
| 52 |
+
df_temp = df_temp.groupby('Year')[col].last().reset_index()
|
| 53 |
+
|
| 54 |
+
df_prices_agg[company] = df_temp
|
| 55 |
+
return df_prices_agg
|
| 56 |
+
|
| 57 |
+
df_merged = df.copy()
|
| 58 |
+
for company in df['Company'].unique():
|
| 59 |
+
ticker_col = f"Close_{TICKERS[company]}"
|
| 60 |
+
df_temp = prices[company][['Date', ticker_col]]
|
| 61 |
+
df_merged = df_merged.merge(df_temp, on='Date', how='left')
|
| 62 |
+
|
| 63 |
+
df_merged = df_merged.merge(prices['NASDAQ'][['Date', 'Close_NASDAQ']], on='Date', how='left')
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# --- GRADIO FUNCTION ---
|
| 67 |
+
def show_sentiment(selected_companies=None, aggregation="Day", selected_year="All"):
|
| 68 |
+
if not selected_companies:
|
| 69 |
+
selected_companies = ["NASDAQ"]
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
if isinstance(selected_companies, str):
|
| 73 |
+
selected_companies = [selected_companies]
|
| 74 |
+
|
| 75 |
+
df_filtered = df_merged.copy()
|
| 76 |
+
if selected_year != "All" and selected_year is not None:
|
| 77 |
+
df_filtered = df_filtered[df_filtered['Year'] == int(selected_year)]
|
| 78 |
+
|
| 79 |
+
# colonna aggregazione
|
| 80 |
+
group_col = aggregation
|
| 81 |
+
if aggregation == "Month":
|
| 82 |
+
df_filtered['Month'] = df_filtered['Month'].dt.to_timestamp()
|
| 83 |
+
elif aggregation == "Day":
|
| 84 |
+
df_filtered['Day'] = df_filtered['Date']
|
| 85 |
+
|
| 86 |
+
# prezzi interpolati
|
| 87 |
+
prices_agg = get_prices_for_agg(aggregation)
|
| 88 |
+
|
| 89 |
+
include_nasdaq = "NASDAQ" in selected_companies
|
| 90 |
+
companies_to_plot = [c for c in selected_companies if c != "NASDAQ"]
|
| 91 |
+
|
| 92 |
+
df_grouped_list = []
|
| 93 |
+
|
| 94 |
+
# aziende selezionate
|
| 95 |
+
if companies_to_plot:
|
| 96 |
+
df_sent = df_filtered[df_filtered['Company'].isin(companies_to_plot)]
|
| 97 |
+
df_tmp = df_sent.groupby([group_col, 'Company']).agg({'Score':'sum'}).reset_index()
|
| 98 |
+
for c in companies_to_plot:
|
| 99 |
+
if c not in TICKERS:
|
| 100 |
+
continue
|
| 101 |
+
ticker_col = f"Close_{TICKERS[c]}"
|
| 102 |
+
df_price_col = prices_agg[c][[group_col, ticker_col]]
|
| 103 |
+
df_tmp = df_tmp.merge(df_price_col, on=group_col, how='left')
|
| 104 |
+
df_grouped_list.append(df_tmp)
|
| 105 |
+
|
| 106 |
+
# NASDAQ con sentiment generale
|
| 107 |
+
if include_nasdaq:
|
| 108 |
+
df_general = df_filtered.groupby(group_col).agg({'Score':'sum'}).reset_index()
|
| 109 |
+
df_general['Company'] = 'General'
|
| 110 |
+
df_general = df_general.merge(prices_agg['NASDAQ'].rename(columns={'Date':group_col}), on=group_col, how='left')
|
| 111 |
+
df_grouped_list.append(df_general)
|
| 112 |
+
|
| 113 |
+
# unisci tutto
|
| 114 |
+
df_grouped = pd.concat(df_grouped_list, ignore_index=True, sort=False)
|
| 115 |
+
df_grouped = df_grouped.sort_values([group_col, 'Company'])
|
| 116 |
+
|
| 117 |
+
# --- Plot ---
|
| 118 |
+
fig = px.line(df_grouped, x=group_col, y='Score', color='Company',
|
| 119 |
+
title=f"Sentiment Trend ({aggregation} Aggregation)")
|
| 120 |
+
|
| 121 |
+
for c in companies_to_plot:
|
| 122 |
+
ticker_col = f"Close_{TICKERS[c]}"
|
| 123 |
+
df_c = df_grouped[df_grouped['Company'] == c]
|
| 124 |
+
if ticker_col in df_c.columns and df_c[ticker_col].notnull().any():
|
| 125 |
+
fig.add_scatter(
|
| 126 |
+
x=df_c[group_col], y=df_c[ticker_col],
|
| 127 |
+
mode='lines', name=f"{c} Price", yaxis="y2",
|
| 128 |
+
line=dict(dash='dot')
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
if include_nasdaq:
|
| 132 |
+
df_c = df_grouped[df_grouped['Company'] == 'General']
|
| 133 |
+
if 'Close_NASDAQ' in df_c.columns and df_c['Close_NASDAQ'].notnull().any():
|
| 134 |
+
fig.add_scatter(
|
| 135 |
+
x=df_c[group_col], y=df_c['Close_NASDAQ'],
|
| 136 |
+
mode='lines', name="NASDAQ Price", yaxis="y2",
|
| 137 |
+
line=dict(dash='dot')
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
fig.update_layout(
|
| 141 |
+
yaxis2=dict(
|
| 142 |
+
title="Stock Price",
|
| 143 |
+
overlaying="y",
|
| 144 |
+
side="right"
|
| 145 |
+
)
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
return df_grouped.tail(30), fig
|
| 149 |
+
|
| 150 |
+
import gradio as gr
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
description_text = """
|
| 154 |
+
### Dynamic Sentiment Dashboard
|
| 155 |
+
|
| 156 |
+
This dashboard allows you to explore the sentiment of news articles related to major tech companies (Apple, Tesla, Microsoft, Meta, Alphabet) and compare it with their stock prices.
|
| 157 |
+
|
| 158 |
+
- **Dataset structure**: The dataset includes a company column; each row corresponds to a news item for a specific company.
|
| 159 |
+
- **Sentiment aggregation**: Select a time aggregation level (Month or Year) to see how sentiment evolves over time.
|
| 160 |
+
- **NASDAQ comparison**: Selecting "NASDAQ" shows the general market sentiment alongside the company-specific sentiment.
|
| 161 |
+
- **Visual insights**: Top-left graph shows average sentiment score and closing price for the selected company.
|
| 162 |
+
"""
|
| 163 |
+
|
| 164 |
+
findings_text = """
|
| 165 |
+
### Key Findings
|
| 166 |
+
|
| 167 |
+
- Some news articles refer to multiple companies, e.g., the same article may mention Apple and Tesla.
|
| 168 |
+
- Merging news with stock prices allows analyzing correlations between sentiment and stock movements for each company.
|
| 169 |
+
- **Apple (2018, 2019, 2022):** Sentiment trends generally align with closing prices, showing similar monthly patterns.
|
| 170 |
+
- **Tesla (2018, 2019, 2022):** More volatility observed; sentiment aligns with stock movement but is more sensitive to news on Elon Musk’s actions.
|
| 171 |
+
- **Microsoft, Meta, Alphabet:** Across periods, sentiment trends follow stock prices with moderate correlation.
|
| 172 |
+
- The custom sentiment model is more aligned with actual stock movements compared to FinBERT, which is more influenced by word positivity/negativity.
|
| 173 |
+
- Aggregating sentiment by month or year helps identify broader trends while reducing noise from daily fluctuations.
|
| 174 |
+
- Including “NASDAQ” as a general market reference allows comparison of individual companies’ sentiment with overall market sentiment.
|
| 175 |
+
"""
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
companies = sorted(df['Company'].unique().tolist()) + ["NASDAQ"]
|
| 179 |
+
years = sorted(df['Year'].dropna().unique().tolist())
|
| 180 |
+
|
| 181 |
+
# --- Build Gradio Blocks ---
|
| 182 |
+
with gr.Blocks() as demo:
|
| 183 |
+
# Markdown in alto
|
| 184 |
+
gr.Markdown("# Dynamic Sentiment Dashboard")
|
| 185 |
+
gr.Markdown(description_text)
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
with gr.Row():
|
| 189 |
+
dropdown_companies = gr.Dropdown(
|
| 190 |
+
choices=companies,
|
| 191 |
+
value=None,
|
| 192 |
+
multiselect=False,
|
| 193 |
+
label="Select Companies"
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
radio_aggregation = gr.Radio(
|
| 197 |
+
choices=["Month", "Year"],
|
| 198 |
+
value="Month",
|
| 199 |
+
label="Aggregation Level"
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
dropdown_year = gr.Dropdown(
|
| 203 |
+
choices=["All"] + years,
|
| 204 |
+
value="All",
|
| 205 |
+
label="Select Year"
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
# Bottone submit
|
| 209 |
+
submit_btn = gr.Button("Submit")
|
| 210 |
+
|
| 211 |
+
# Output
|
| 212 |
+
data_table = gr.Dataframe(label="Sentiment Table", type="pandas")
|
| 213 |
+
sentiment_plot = gr.Plot(label="Sentiment Trend")
|
| 214 |
+
|
| 215 |
+
# Findings section
|
| 216 |
+
gr.Markdown(findings_text)
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
submit_btn.click(
|
| 220 |
+
fn=show_sentiment,
|
| 221 |
+
inputs=[dropdown_companies, radio_aggregation, dropdown_year],
|
| 222 |
+
outputs=[data_table, sentiment_plot]
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pandas
|
| 2 |
+
datasets
|
| 3 |
+
selenium
|
| 4 |
+
transformers
|
| 5 |
+
plotly
|
| 6 |
+
gradio
|
| 7 |
+
torch
|
| 8 |
+
datetime
|
| 9 |
+
yfinance
|