SelmaNajih001's picture
Update app.py
cbfc950 verified
raw
history blame
7.7 kB
import pandas as pd
import yfinance as yf
from datasets import load_dataset
from transformers import pipeline
import plotly.graph_objects as go
import gradio as gr
# --- PARAMETRI ---
HF_DATASET = "SelmaNajih001/FT_MultiCompany"
MODEL_SENTIMENT = "SelmaNajih001/SentimentBasedOnPriceVariation"
MODEL_PRICE_TESLA = "SelmaNajih001/PricePredictionForTesla"
MODEL_PRICE_MICROSOFT = "SelmaNajih001/PricePredictionForMicrosoft"
MODEL_FINBERT = "ProsusAI/finbert"
TICKERS = {
"Tesla, Inc": "TSLA",
"Microsoft": "MSFT"
}
companies = list(TICKERS.keys())
# --- PIPELINES ---
sentiment_pipeline = pipeline("sentiment-analysis", model=MODEL_SENTIMENT)
price_pipeline_tesla = pipeline("text-classification", model=MODEL_PRICE_TESLA)
price_pipeline_msft = pipeline("text-classification", model=MODEL_PRICE_MICROSOFT)
finbert_pipeline = pipeline("sentiment-analysis", model=MODEL_FINBERT)
# --- LOAD DATASET ---
df_multi = pd.DataFrame(load_dataset(HF_DATASET)["train"])
df_multi['date'] = pd.to_datetime(df_multi['Date'], errors='coerce')
df_multi['date_merge'] = df_multi['date'].dt.normalize()
df_multi.sort_values('date', inplace=True)
# --- SENTIMENT & PREDICTION ---
df_multi['Sentiment'] = ""
df_multi['Confidence'] = 0.0
df_multi['Predicted'] = 0.0
df_multi['FinBERT_Sentiment'] = ""
df_multi['FinBERT_Confidence'] = 0.0
for i, row in df_multi.iterrows():
company = row['Company']
# Custom sentiment
try:
res = sentiment_pipeline(row['Summary'])[0]
df_multi.at[i,'Sentiment'] = res['label'].upper().strip()
df_multi.at[i,'Confidence'] = res['score']
except:
df_multi.at[i,'Sentiment'] = 'ERROR'
df_multi.at[i,'Confidence'] = 0.0
# FinBERT
try:
res_f = finbert_pipeline(row['Summary'])[0]
df_multi.at[i,'FinBERT_Sentiment'] = res_f['label'].upper().strip()
df_multi.at[i,'FinBERT_Confidence'] = res_f['score']
except:
df_multi.at[i,'FinBERT_Sentiment'] = 'ERROR'
df_multi.at[i,'FinBERT_Confidence'] = 0.0
# Regression (Tesla & MSFT)
try:
if company == "Tesla, Inc":
val = price_pipeline_tesla(row['Summary'])[0]['score']
df_multi.at[i,'Predicted'] = min(val, 1.0)
elif company == "Microsoft":
val = price_pipeline_msft(row['Summary'])[0]['score']
df_multi.at[i,'Predicted'] = min(val, 1.0)
except:
df_multi.at[i,'Predicted'] = 0.0
# --- FETCH STOCK PRICES ---
prices = {}
for company, ticker in TICKERS.items():
start_date = df_multi[df_multi['Company']==company]['date'].min()
end_date = pd.Timestamp.today()
df_prices = yf.download(ticker, start=start_date, end=end_date)[['Close']].reset_index()
df_prices.columns = ['Date_', f'Close_{ticker}']
df_prices['date_merge'] = pd.to_datetime(df_prices['Date_']).dt.normalize()
df_prices['PctChangeDaily'] = df_prices[f'Close_{ticker}'].pct_change().shift(-1)
prices[company] = df_prices
# --- MERGE & STRATEGIE ---
dfs_final = {}
for company in companies:
df_c = df_multi[df_multi['Company'] == company].copy()
if company in prices:
df_c = pd.merge(df_c, prices[company], on='date_merge', how='inner')
df_c['Day'] = df_c['date'].dt.date
df_c['Month'] = df_c['date'].dt.to_period('M').dt.to_timestamp()
df_c['Year'] = df_c['date'].dt.year
# Strategy A: Custom Sentiment
df_c['StrategyA_Cumulative'] = 0.0
for i in range(1, len(df_c)):
pct = df_c.loc[i, 'PctChangeDaily'] if pd.notnull(df_c.loc[i,'PctChangeDaily']) else 0
if df_c.loc[i, 'Sentiment'] == "UP" and df_c.loc[i,'Confidence'] > 0.8:
df_c.loc[i,'StrategyA_Cumulative'] = df_c.loc[i-1,'StrategyA_Cumulative'] + pct
elif df_c.loc[i, 'Sentiment'] == "DOWN" and df_c.loc[i,'Confidence'] > 0.8:
df_c.loc[i,'StrategyA_Cumulative'] = df_c.loc[i-1,'StrategyA_Cumulative'] - pct
else:
df_c.loc[i,'StrategyA_Cumulative'] = df_c.loc[i-1,'StrategyA_Cumulative']
# Strategy B: Regression
df_c['StrategyB_Cumulative'] = (df_c['Predicted'] * df_c['PctChangeDaily']).cumsum()
# Strategy C: FinBERT
df_c['StrategyC_Cumulative'] = 0.0
for i in range(1, len(df_c)):
pct = df_c.loc[i, 'PctChangeDaily'] if pd.notnull(df_c.loc[i,'PctChangeDaily']) else 0
if df_c.loc[i, 'FinBERT_Sentiment'] == "POSITIVE" and df_c.loc[i,'FinBERT_Confidence'] > 0.8:
df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative'] + pct
elif df_c.loc[i, 'FinBERT_Sentiment'] == "NEGATIVE" and df_c.loc[i,'FinBERT_Confidence'] > 0.8:
df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative'] - pct
else:
df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative']
dfs_final[company] = df_c.drop(columns=["date", "date_merge"], errors="ignore")
# --- FUNZIONE PER GRADIO ---
def show_company_data(selected_companies, aggregation="Day"):
if not selected_companies:
return "Please select at least one company", None, None
agg_col = {"Day": "Day", "Month": "Month", "Year": "Year"}.get(aggregation, "Day")
# --- Strategies Chart ---
fig_strat = go.Figure()
for c in selected_companies:
if c not in dfs_final:
continue
df_c = dfs_final[c]
df_grouped = df_c.groupby(agg_col).agg({
'StrategyA_Cumulative': 'last',
'StrategyB_Cumulative': 'last',
'StrategyC_Cumulative': 'last'
}).reset_index()
strategy_labels = {
'StrategyA_Cumulative': "Custom Sentiment",
'StrategyB_Cumulative': "Regression",
'StrategyC_Cumulative': "FinBERT"
}
for strat in ['StrategyA_Cumulative', 'StrategyB_Cumulative', 'StrategyC_Cumulative']:
fig_strat.add_trace(go.Scatter(
x=df_grouped[agg_col],
y=df_grouped[strat],
mode="lines",
name=f"{c} - {strategy_labels[strat]}"
))
fig_strat.update_layout(
title="Strategies Comparison (Custom Sentiment, Regression, FinBERT)",
xaxis_title=aggregation,
yaxis_title="Cumulative Value",
template="plotly_dark",
hovermode="x unified"
)
# --- Grafico Prezzi ---
fig_price = go.Figure()
for c in selected_companies:
if c not in dfs_final:
continue
df_c = dfs_final[c]
df_grouped = df_c.groupby(agg_col).agg({
f'Close_{TICKERS[c]}':'last'
}).reset_index()
fig_price.add_trace(go.Scatter(
x=df_grouped[agg_col], y=df_grouped[f'Close_{TICKERS[c]}'],
mode="lines", name=f"{c} Price"
))
fig_price.update_layout(
title="Andamento Prezzi",
xaxis_title=aggregation,
yaxis_title="Stock Price",
template="plotly_dark",
hovermode="x unified"
)
return None, fig_strat, fig_price
# --- INTERFACCIA GRADIO ---
demo = gr.Interface(
fn=show_company_data,
inputs=[
gr.Dropdown(
choices=companies,
value=["Microsoft", "Tesla"],
multiselect=True,
label="Select Companies"
),
gr.Radio(
choices=["Day", "Month", "Year"],
value="Day",
label="Aggregation Level"
)
],
outputs=[
gr.Dataframe(label="Data Preview", type="pandas"),
gr.Plot(label="Strategies"),
gr.Plot(label="Prices")
],
title="Portfolio Evolution",
description="Compare Custom Sentiment, Regression, and FinBERT strategies alongside stock prices."
)
demo.launch()