SelmaNajih001 commited on
Commit
eda1490
·
verified ·
1 Parent(s): 656a3c0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +223 -0
app.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import plotly.express as px
3
+ from datasets import load_dataset
4
+ import gradio as gr
5
+ import yfinance as yf
6
+
7
+ df = pd.DataFrame(load_dataset("SelmaNajih001/NewsSentiment")["train"])
8
+ df = df[df["Company"].isin(["Tesla", "Microsoft", "Apple", "Facebook", "Google"])]
9
+
10
+ df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
11
+ df['Year'] = df['Date'].dt.year
12
+ df['Month'] = df['Date'].dt.to_period('M')
13
+ df['Day'] = df['Date'].dt.date
14
+ df = df[df['Year'] >= 2015]
15
+
16
+
17
+ TICKERS = {
18
+ "Tesla": "TSLA",
19
+ "Microsoft": "MSFT",
20
+ "Apple": "AAPL",
21
+ "Facebook": "META",
22
+ "Google": "GOOGL",
23
+ "NASDAQ": "^IXIC"
24
+ }
25
+
26
+ prices = {}
27
+ for company, ticker in TICKERS.items():
28
+ start_date = "2015-01-01"
29
+ end_date = pd.Timestamp.today()
30
+ df_prices = yf.download(ticker, start=start_date, end=end_date)
31
+ if isinstance(df_prices.columns, pd.MultiIndex):
32
+ df_prices.columns = ['_'.join([str(c) for c in col]).strip() for col in df_prices.columns]
33
+ df_prices = df_prices.reset_index()[['Date', f'Close_{ticker}']]
34
+ if company == "NASDAQ":
35
+ df_prices = df_prices.rename(columns={f'Close_{ticker}': 'Close_NASDAQ'})
36
+ prices[company] = df_prices
37
+
38
+ def get_prices_for_agg(agg_col):
39
+ df_prices_agg = {}
40
+ for company, df_price in prices.items():
41
+ df_temp = df_price.copy()
42
+ col = 'Close_NASDAQ' if company == 'NASDAQ' else f"Close_{TICKERS[company]}"
43
+ df_temp = df_temp.rename(columns={df_temp.columns[1]: col})
44
+
45
+ if agg_col == "Day":
46
+ df_temp = df_temp.set_index('Date').resample('D').mean().interpolate('linear').reset_index()
47
+ elif agg_col == "Month":
48
+ df_temp['Month'] = df_temp['Date'].dt.to_period('M').dt.to_timestamp()
49
+ df_temp = df_temp.groupby('Month')[col].last().reset_index()
50
+ elif agg_col == "Year":
51
+ df_temp['Year'] = df_temp['Date'].dt.year
52
+ df_temp = df_temp.groupby('Year')[col].last().reset_index()
53
+
54
+ df_prices_agg[company] = df_temp
55
+ return df_prices_agg
56
+
57
+ df_merged = df.copy()
58
+ for company in df['Company'].unique():
59
+ ticker_col = f"Close_{TICKERS[company]}"
60
+ df_temp = prices[company][['Date', ticker_col]]
61
+ df_merged = df_merged.merge(df_temp, on='Date', how='left')
62
+
63
+ df_merged = df_merged.merge(prices['NASDAQ'][['Date', 'Close_NASDAQ']], on='Date', how='left')
64
+
65
+
66
+ # --- GRADIO FUNCTION ---
67
+ def show_sentiment(selected_companies=None, aggregation="Day", selected_year="All"):
68
+ if not selected_companies:
69
+ selected_companies = ["NASDAQ"]
70
+
71
+
72
+ if isinstance(selected_companies, str):
73
+ selected_companies = [selected_companies]
74
+
75
+ df_filtered = df_merged.copy()
76
+ if selected_year != "All" and selected_year is not None:
77
+ df_filtered = df_filtered[df_filtered['Year'] == int(selected_year)]
78
+
79
+ # colonna aggregazione
80
+ group_col = aggregation
81
+ if aggregation == "Month":
82
+ df_filtered['Month'] = df_filtered['Month'].dt.to_timestamp()
83
+ elif aggregation == "Day":
84
+ df_filtered['Day'] = df_filtered['Date']
85
+
86
+ # prezzi interpolati
87
+ prices_agg = get_prices_for_agg(aggregation)
88
+
89
+ include_nasdaq = "NASDAQ" in selected_companies
90
+ companies_to_plot = [c for c in selected_companies if c != "NASDAQ"]
91
+
92
+ df_grouped_list = []
93
+
94
+ # aziende selezionate
95
+ if companies_to_plot:
96
+ df_sent = df_filtered[df_filtered['Company'].isin(companies_to_plot)]
97
+ df_tmp = df_sent.groupby([group_col, 'Company']).agg({'Score':'sum'}).reset_index()
98
+ for c in companies_to_plot:
99
+ if c not in TICKERS:
100
+ continue
101
+ ticker_col = f"Close_{TICKERS[c]}"
102
+ df_price_col = prices_agg[c][[group_col, ticker_col]]
103
+ df_tmp = df_tmp.merge(df_price_col, on=group_col, how='left')
104
+ df_grouped_list.append(df_tmp)
105
+
106
+ # NASDAQ con sentiment generale
107
+ if include_nasdaq:
108
+ df_general = df_filtered.groupby(group_col).agg({'Score':'sum'}).reset_index()
109
+ df_general['Company'] = 'General'
110
+ df_general = df_general.merge(prices_agg['NASDAQ'].rename(columns={'Date':group_col}), on=group_col, how='left')
111
+ df_grouped_list.append(df_general)
112
+
113
+ # unisci tutto
114
+ df_grouped = pd.concat(df_grouped_list, ignore_index=True, sort=False)
115
+ df_grouped = df_grouped.sort_values([group_col, 'Company'])
116
+
117
+ # --- Plot ---
118
+ fig = px.line(df_grouped, x=group_col, y='Score', color='Company',
119
+ title=f"Sentiment Trend ({aggregation} Aggregation)")
120
+
121
+ for c in companies_to_plot:
122
+ ticker_col = f"Close_{TICKERS[c]}"
123
+ df_c = df_grouped[df_grouped['Company'] == c]
124
+ if ticker_col in df_c.columns and df_c[ticker_col].notnull().any():
125
+ fig.add_scatter(
126
+ x=df_c[group_col], y=df_c[ticker_col],
127
+ mode='lines', name=f"{c} Price", yaxis="y2",
128
+ line=dict(dash='dot')
129
+ )
130
+
131
+ if include_nasdaq:
132
+ df_c = df_grouped[df_grouped['Company'] == 'General']
133
+ if 'Close_NASDAQ' in df_c.columns and df_c['Close_NASDAQ'].notnull().any():
134
+ fig.add_scatter(
135
+ x=df_c[group_col], y=df_c['Close_NASDAQ'],
136
+ mode='lines', name="NASDAQ Price", yaxis="y2",
137
+ line=dict(dash='dot')
138
+ )
139
+
140
+ fig.update_layout(
141
+ yaxis2=dict(
142
+ title="Stock Price",
143
+ overlaying="y",
144
+ side="right"
145
+ )
146
+ )
147
+
148
+ return df_grouped.tail(30), fig
149
+
150
+ import gradio as gr
151
+
152
+
153
+ description_text = """
154
+ ### Dynamic Sentiment Dashboard
155
+ This dashboard allows you to explore the sentiment of news articles related to major tech companies (Apple, Tesla, Microsoft, Meta, Alphabet) and compare it with their stock prices.
156
+ - **Dataset structure**: The dataset includes a company column; each row corresponds to a news item for a specific company.
157
+ - **Sentiment aggregation**: Select a time aggregation level (Month or Year) to see how sentiment evolves over time.
158
+ - **NASDAQ comparison**: Selecting "NASDAQ" shows the general market sentiment alongside the company-specific sentiment.
159
+ - **Visual insights**: Top-left graph shows average sentiment score and closing price for the selected company.
160
+ """
161
+
162
+ findings_text = """
163
+ ### Key Findings
164
+ - Some news articles refer to multiple companies, e.g., the same article may mention Apple and Tesla.
165
+ - Merging news with stock prices allows analyzing correlations between sentiment and stock movements for each company.
166
+ - **Apple (2018, 2019, 2022):** Sentiment trends generally align with closing prices, showing similar monthly patterns.
167
+ - **Tesla (2018, 2019, 2022):** More volatility observed; sentiment aligns with stock movement but is more sensitive to news on Elon Musk’s actions.
168
+ - **Microsoft, Meta, Alphabet:** Across periods, sentiment trends follow stock prices with moderate correlation.
169
+ - The custom sentiment model is more aligned with actual stock movements compared to FinBERT, which is more influenced by word positivity/negativity.
170
+ - Aggregating sentiment by month or year helps identify broader trends while reducing noise from daily fluctuations.
171
+ - Including “NASDAQ” as a general market reference allows comparison of individual companies’ sentiment with overall market sentiment.
172
+ """
173
+
174
+
175
+ companies = sorted(df['Company'].unique().tolist()) + ["NASDAQ"]
176
+ years = sorted(df['Year'].dropna().unique().tolist())
177
+
178
+ # --- Build Gradio Blocks ---
179
+ with gr.Blocks() as demo:
180
+ # Markdown in alto
181
+ gr.Markdown("# Dynamic Sentiment Dashboard")
182
+ gr.Markdown(description_text)
183
+
184
+
185
+ with gr.Row():
186
+ dropdown_companies = gr.Dropdown(
187
+ choices=companies,
188
+ value=None,
189
+ multiselect=False,
190
+ label="Select Companies"
191
+ )
192
+
193
+ radio_aggregation = gr.Radio(
194
+ choices=["Month", "Year"],
195
+ value="Month",
196
+ label="Aggregation Level"
197
+ )
198
+
199
+ dropdown_year = gr.Dropdown(
200
+ choices=["All"] + years,
201
+ value="All",
202
+ label="Select Year"
203
+ )
204
+
205
+ # Bottone submit
206
+ submit_btn = gr.Button("Submit")
207
+
208
+ # Output
209
+ data_table = gr.Dataframe(label="Sentiment Table", type="pandas")
210
+ sentiment_plot = gr.Plot(label="Sentiment Trend")
211
+
212
+ # Findings section
213
+ gr.Markdown(findings_text)
214
+
215
+
216
+ submit_btn.click(
217
+ fn=show_sentiment,
218
+ inputs=[dropdown_companies, radio_aggregation, dropdown_year],
219
+ outputs=[data_table, sentiment_plot]
220
+ )
221
+
222
+ if __name__ == "__main__":
223
+ demo.launch(server_name="0.0.0.0", show_error=True)