SelmaNajih001 commited on
Commit
a3c8f1b
·
0 Parent(s):

Initial commit

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +13 -0
  3. app.py +225 -0
  4. requirements.txt +9 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: SentimentAnalysis
3
+ emoji: 📈
4
+ colorFrom: purple
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.46.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: cc-by-sa-4.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import plotly.express as px
3
+ from datasets import load_dataset
4
+ import gradio as gr
5
+ import yfinance as yf
6
+
7
+ df = pd.DataFrame(load_dataset("SelmaNajih001/NewsSentiment")["train"])
8
+ df = df[df["Company"].isin(["Tesla", "Microsoft", "Apple", "Facebook", "Google"])]
9
+
10
+ df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
11
+ df['Year'] = df['Date'].dt.year
12
+ df['Month'] = df['Date'].dt.to_period('M')
13
+ df['Day'] = df['Date'].dt.date
14
+ df = df[df['Year'] >= 2015]
15
+
16
+
17
+ TICKERS = {
18
+ "Tesla": "TSLA",
19
+ "Microsoft": "MSFT",
20
+ "Apple": "AAPL",
21
+ "Facebook": "META",
22
+ "Google": "GOOGL",
23
+ "NASDAQ": "^IXIC"
24
+ }
25
+
26
+ prices = {}
27
+ for company, ticker in TICKERS.items():
28
+ start_date = "2015-01-01"
29
+ end_date = pd.Timestamp.today()
30
+ df_prices = yf.download(ticker, start=start_date, end=end_date)
31
+ if isinstance(df_prices.columns, pd.MultiIndex):
32
+ df_prices.columns = ['_'.join([str(c) for c in col]).strip() for col in df_prices.columns]
33
+ df_prices = df_prices.reset_index()[['Date', f'Close_{ticker}']]
34
+ if company == "NASDAQ":
35
+ df_prices = df_prices.rename(columns={f'Close_{ticker}': 'Close_NASDAQ'})
36
+ prices[company] = df_prices
37
+
38
+ def get_prices_for_agg(agg_col):
39
+ df_prices_agg = {}
40
+ for company, df_price in prices.items():
41
+ df_temp = df_price.copy()
42
+ col = 'Close_NASDAQ' if company == 'NASDAQ' else f"Close_{TICKERS[company]}"
43
+ df_temp = df_temp.rename(columns={df_temp.columns[1]: col})
44
+
45
+ if agg_col == "Day":
46
+ df_temp = df_temp.set_index('Date').resample('D').mean().interpolate('linear').reset_index()
47
+ elif agg_col == "Month":
48
+ df_temp['Month'] = df_temp['Date'].dt.to_period('M').dt.to_timestamp()
49
+ df_temp = df_temp.groupby('Month')[col].last().reset_index()
50
+ elif agg_col == "Year":
51
+ df_temp['Year'] = df_temp['Date'].dt.year
52
+ df_temp = df_temp.groupby('Year')[col].last().reset_index()
53
+
54
+ df_prices_agg[company] = df_temp
55
+ return df_prices_agg
56
+
57
+ df_merged = df.copy()
58
+ for company in df['Company'].unique():
59
+ ticker_col = f"Close_{TICKERS[company]}"
60
+ df_temp = prices[company][['Date', ticker_col]]
61
+ df_merged = df_merged.merge(df_temp, on='Date', how='left')
62
+
63
+ df_merged = df_merged.merge(prices['NASDAQ'][['Date', 'Close_NASDAQ']], on='Date', how='left')
64
+
65
+
66
+ # --- GRADIO FUNCTION ---
67
+ def show_sentiment(selected_companies=None, aggregation="Day", selected_year="All"):
68
+ if not selected_companies:
69
+ selected_companies = ["NASDAQ"]
70
+
71
+
72
+ if isinstance(selected_companies, str):
73
+ selected_companies = [selected_companies]
74
+
75
+ df_filtered = df_merged.copy()
76
+ if selected_year != "All" and selected_year is not None:
77
+ df_filtered = df_filtered[df_filtered['Year'] == int(selected_year)]
78
+
79
+ # colonna aggregazione
80
+ group_col = aggregation
81
+ if aggregation == "Month":
82
+ df_filtered['Month'] = df_filtered['Month'].dt.to_timestamp()
83
+ elif aggregation == "Day":
84
+ df_filtered['Day'] = df_filtered['Date']
85
+
86
+ # prezzi interpolati
87
+ prices_agg = get_prices_for_agg(aggregation)
88
+
89
+ include_nasdaq = "NASDAQ" in selected_companies
90
+ companies_to_plot = [c for c in selected_companies if c != "NASDAQ"]
91
+
92
+ df_grouped_list = []
93
+
94
+ # aziende selezionate
95
+ if companies_to_plot:
96
+ df_sent = df_filtered[df_filtered['Company'].isin(companies_to_plot)]
97
+ df_tmp = df_sent.groupby([group_col, 'Company']).agg({'Score':'sum'}).reset_index()
98
+ for c in companies_to_plot:
99
+ if c not in TICKERS:
100
+ continue
101
+ ticker_col = f"Close_{TICKERS[c]}"
102
+ df_price_col = prices_agg[c][[group_col, ticker_col]]
103
+ df_tmp = df_tmp.merge(df_price_col, on=group_col, how='left')
104
+ df_grouped_list.append(df_tmp)
105
+
106
+ # NASDAQ con sentiment generale
107
+ if include_nasdaq:
108
+ df_general = df_filtered.groupby(group_col).agg({'Score':'sum'}).reset_index()
109
+ df_general['Company'] = 'General'
110
+ df_general = df_general.merge(prices_agg['NASDAQ'].rename(columns={'Date':group_col}), on=group_col, how='left')
111
+ df_grouped_list.append(df_general)
112
+
113
+ # unisci tutto
114
+ df_grouped = pd.concat(df_grouped_list, ignore_index=True, sort=False)
115
+ df_grouped = df_grouped.sort_values([group_col, 'Company'])
116
+
117
+ # --- Plot ---
118
+ fig = px.line(df_grouped, x=group_col, y='Score', color='Company',
119
+ title=f"Sentiment Trend ({aggregation} Aggregation)")
120
+
121
+ for c in companies_to_plot:
122
+ ticker_col = f"Close_{TICKERS[c]}"
123
+ df_c = df_grouped[df_grouped['Company'] == c]
124
+ if ticker_col in df_c.columns and df_c[ticker_col].notnull().any():
125
+ fig.add_scatter(
126
+ x=df_c[group_col], y=df_c[ticker_col],
127
+ mode='lines', name=f"{c} Price", yaxis="y2",
128
+ line=dict(dash='dot')
129
+ )
130
+
131
+ if include_nasdaq:
132
+ df_c = df_grouped[df_grouped['Company'] == 'General']
133
+ if 'Close_NASDAQ' in df_c.columns and df_c['Close_NASDAQ'].notnull().any():
134
+ fig.add_scatter(
135
+ x=df_c[group_col], y=df_c['Close_NASDAQ'],
136
+ mode='lines', name="NASDAQ Price", yaxis="y2",
137
+ line=dict(dash='dot')
138
+ )
139
+
140
+ fig.update_layout(
141
+ yaxis2=dict(
142
+ title="Stock Price",
143
+ overlaying="y",
144
+ side="right"
145
+ )
146
+ )
147
+
148
+ return df_grouped.tail(30), fig
149
+
150
+ import gradio as gr
151
+
152
+
153
+ description_text = """
154
+ ### Dynamic Sentiment Dashboard
155
+
156
+ This dashboard allows you to explore the sentiment of news articles related to major tech companies (Apple, Tesla, Microsoft, Meta, Alphabet) and compare it with their stock prices.
157
+
158
+ - **Dataset structure**: The dataset includes a company column; each row corresponds to a news item for a specific company.
159
+ - **Sentiment aggregation**: Select a time aggregation level (Month or Year) to see how sentiment evolves over time.
160
+ - **NASDAQ comparison**: Selecting "NASDAQ" shows the general market sentiment alongside the company-specific sentiment.
161
+ - **Visual insights**: Top-left graph shows average sentiment score and closing price for the selected company.
162
+ """
163
+
164
+ findings_text = """
165
+ ### Key Findings
166
+
167
+ - Some news articles refer to multiple companies, e.g., the same article may mention Apple and Tesla.
168
+ - Merging news with stock prices allows analyzing correlations between sentiment and stock movements for each company.
169
+ - **Apple (2018, 2019, 2022):** Sentiment trends generally align with closing prices, showing similar monthly patterns.
170
+ - **Tesla (2018, 2019, 2022):** More volatility observed; sentiment aligns with stock movement but is more sensitive to news on Elon Musk’s actions.
171
+ - **Microsoft, Meta, Alphabet:** Across periods, sentiment trends follow stock prices with moderate correlation.
172
+ - The custom sentiment model is more aligned with actual stock movements compared to FinBERT, which is more influenced by word positivity/negativity.
173
+ - Aggregating sentiment by month or year helps identify broader trends while reducing noise from daily fluctuations.
174
+ - Including “NASDAQ” as a general market reference allows comparison of individual companies’ sentiment with overall market sentiment.
175
+ """
176
+
177
+
178
+ companies = sorted(df['Company'].unique().tolist()) + ["NASDAQ"]
179
+ years = sorted(df['Year'].dropna().unique().tolist())
180
+
181
+ # --- Build Gradio Blocks ---
182
+ with gr.Blocks() as demo:
183
+ # Markdown in alto
184
+ gr.Markdown("# Dynamic Sentiment Dashboard")
185
+ gr.Markdown(description_text)
186
+
187
+
188
+ with gr.Row():
189
+ dropdown_companies = gr.Dropdown(
190
+ choices=companies,
191
+ value=None,
192
+ multiselect=False,
193
+ label="Select Companies"
194
+ )
195
+
196
+ radio_aggregation = gr.Radio(
197
+ choices=["Month", "Year"],
198
+ value="Month",
199
+ label="Aggregation Level"
200
+ )
201
+
202
+ dropdown_year = gr.Dropdown(
203
+ choices=["All"] + years,
204
+ value="All",
205
+ label="Select Year"
206
+ )
207
+
208
+ # Bottone submit
209
+ submit_btn = gr.Button("Submit")
210
+
211
+ # Output
212
+ data_table = gr.Dataframe(label="Sentiment Table", type="pandas")
213
+ sentiment_plot = gr.Plot(label="Sentiment Trend")
214
+
215
+ # Findings section
216
+ gr.Markdown(findings_text)
217
+
218
+
219
+ submit_btn.click(
220
+ fn=show_sentiment,
221
+ inputs=[dropdown_companies, radio_aggregation, dropdown_year],
222
+ outputs=[data_table, sentiment_plot]
223
+ )
224
+
225
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ datasets
3
+ selenium
4
+ transformers
5
+ plotly
6
+ gradio
7
+ torch
8
+ datetime
9
+ yfinance