Update app.py
Browse files
app.py
CHANGED
|
@@ -7,10 +7,11 @@ import gradio as gr
|
|
| 7 |
from huggingface_hub import login
|
| 8 |
import os
|
| 9 |
|
|
|
|
| 10 |
token = os.getenv("HF_TOKEN")
|
| 11 |
login(token=token)
|
| 12 |
|
| 13 |
-
|
| 14 |
HF_DATASET = "SelmaNajih001/FT_MultiCompany"
|
| 15 |
HF_PRIVATE_DATASET = "SelmaNajih001/portfolio_strategy_data"
|
| 16 |
MODEL_SENTIMENT = "SelmaNajih001/SentimentBasedOnPriceVariation"
|
|
@@ -24,11 +25,13 @@ TICKERS = {
|
|
| 24 |
}
|
| 25 |
companies = list(TICKERS.keys())
|
| 26 |
|
|
|
|
| 27 |
sentiment_pipeline = pipeline("sentiment-analysis", model=MODEL_SENTIMENT)
|
| 28 |
price_pipeline_tesla = pipeline("text-classification", model=MODEL_PRICE_TESLA)
|
| 29 |
price_pipeline_msft = pipeline("text-classification", model=MODEL_PRICE_MICROSOFT)
|
| 30 |
finbert_pipeline = pipeline("sentiment-analysis", model=MODEL_FINBERT)
|
| 31 |
|
|
|
|
| 32 |
df_multi = pd.DataFrame(load_dataset(HF_DATASET)["train"])
|
| 33 |
df_multi['date'] = pd.to_datetime(df_multi['Date'], errors='coerce')
|
| 34 |
df_multi['date_merge'] = df_multi['date'].dt.normalize()
|
|
@@ -40,60 +43,64 @@ try:
|
|
| 40 |
except:
|
| 41 |
df_existing = pd.DataFrame()
|
| 42 |
|
|
|
|
| 43 |
if not df_existing.empty:
|
| 44 |
df_to_add = df_multi[~df_multi['Date'].isin(df_existing['Date'])]
|
| 45 |
else:
|
| 46 |
df_to_add = df_multi.copy()
|
| 47 |
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
hf_dataset_updated.push_to_hub(HF_PRIVATE_DATASET, private=True)
|
| 55 |
-
print(f"Dataset aggiornato su Hugging Face: {HF_PRIVATE_DATASET}")
|
| 56 |
|
| 57 |
-
|
| 58 |
-
df_multi = df_updated.copy()
|
| 59 |
-
df_multi['Sentiment'] = ""
|
| 60 |
-
df_multi['Confidence'] = 0.0
|
| 61 |
-
df_multi['Predicted'] = 0.0
|
| 62 |
-
df_multi['FinBERT_Sentiment'] = ""
|
| 63 |
-
df_multi['FinBERT_Confidence'] = 0.0
|
| 64 |
-
|
| 65 |
-
for i, row in df_multi.iterrows():
|
| 66 |
company = row['Company']
|
| 67 |
|
| 68 |
# Custom sentiment
|
| 69 |
try:
|
| 70 |
res = sentiment_pipeline(row['Summary'])[0]
|
| 71 |
-
|
| 72 |
-
|
| 73 |
except:
|
| 74 |
-
|
| 75 |
-
|
| 76 |
|
| 77 |
# FinBERT
|
| 78 |
try:
|
| 79 |
res_f = finbert_pipeline(row['Summary'])[0]
|
| 80 |
-
|
| 81 |
-
|
| 82 |
except:
|
| 83 |
-
|
| 84 |
-
|
| 85 |
|
| 86 |
-
# Regression
|
| 87 |
try:
|
| 88 |
if company == "Tesla, Inc.":
|
| 89 |
val = price_pipeline_tesla(row['Summary'])[0]['score']
|
| 90 |
-
|
| 91 |
elif company == "Microsoft":
|
| 92 |
val = price_pipeline_msft(row['Summary'])[0]['score']
|
| 93 |
-
|
| 94 |
except:
|
| 95 |
-
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
prices = {}
|
| 99 |
for company, ticker in TICKERS.items():
|
|
@@ -116,38 +123,34 @@ for company in companies:
|
|
| 116 |
df_c['Month'] = df_c['date'].dt.to_period('M').dt.to_timestamp()
|
| 117 |
df_c['Year'] = df_c['date'].dt.year
|
| 118 |
|
| 119 |
-
# Strategy A
|
| 120 |
df_c['StrategyA_Cumulative'] = 0.0
|
| 121 |
for i in range(1, len(df_c)):
|
| 122 |
pct = df_c.loc[i, 'PctChangeDaily'] if pd.notnull(df_c.loc[i,'PctChangeDaily']) else 0
|
| 123 |
-
price = df_c.loc[i-1, f'Close_{TICKERS[company]}']
|
| 124 |
-
|
| 125 |
if df_c.loc[i, 'Sentiment'] == "UP" and df_c.loc[i,'Confidence'] > 0.8:
|
| 126 |
df_c.loc[i,'StrategyA_Cumulative'] = df_c.loc[i-1,'StrategyA_Cumulative'] + price * pct
|
| 127 |
elif df_c.loc[i, 'Sentiment'] == "DOWN" and df_c.loc[i,'Confidence'] > 0.8:
|
| 128 |
df_c.loc[i,'StrategyA_Cumulative'] = df_c.loc[i-1,'StrategyA_Cumulative'] - price * pct
|
| 129 |
else:
|
| 130 |
df_c.loc[i,'StrategyA_Cumulative'] = df_c.loc[i-1,'StrategyA_Cumulative']
|
| 131 |
-
# Strategy B
|
| 132 |
df_c['StrategyB_Cumulative'] = 0.0
|
| 133 |
for i in range(1, len(df_c)):
|
| 134 |
pct = df_c.loc[i, 'PctChangeDaily'] if pd.notnull(df_c.loc[i,'PctChangeDaily']) else 0
|
| 135 |
price = df_c.loc[i-1, f'Close_{TICKERS[company]}']
|
| 136 |
predicted = df_c.loc[i, 'Predicted']
|
| 137 |
-
|
| 138 |
if predicted > 1:
|
| 139 |
df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative'] + price * pct
|
| 140 |
elif predicted < -1:
|
| 141 |
df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative'] - price * pct
|
| 142 |
else:
|
| 143 |
df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative']
|
| 144 |
-
|
| 145 |
-
# Strategy C: FinBERT
|
| 146 |
df_c['StrategyC_Cumulative'] = 0.0
|
| 147 |
for i in range(1, len(df_c)):
|
| 148 |
pct = df_c.loc[i, 'PctChangeDaily'] if pd.notnull(df_c.loc[i,'PctChangeDaily']) else 0
|
| 149 |
price = df_c.loc[i-1, f'Close_{TICKERS[company]}']
|
| 150 |
-
|
| 151 |
if df_c.loc[i, 'FinBERT_Sentiment'] == "POSITIVE" and df_c.loc[i,'FinBERT_Confidence'] > 0.8:
|
| 152 |
df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative'] + price * pct
|
| 153 |
elif df_c.loc[i, 'FinBERT_Sentiment'] == "NEGATIVE" and df_c.loc[i,'FinBERT_Confidence'] > 0.8:
|
|
@@ -157,14 +160,13 @@ for company in companies:
|
|
| 157 |
|
| 158 |
dfs_final[company] = df_c.drop(columns=["date", "date_merge"], errors="ignore")
|
| 159 |
|
| 160 |
-
# ---
|
| 161 |
def show_company_data(selected_companies, aggregation="Day"):
|
| 162 |
if not selected_companies:
|
| 163 |
return pd.DataFrame(), None, None
|
| 164 |
|
| 165 |
agg_col = {"Day": "Day", "Month": "Month", "Year": "Year"}.get(aggregation, "Day")
|
| 166 |
|
| 167 |
-
# --- Figures ---
|
| 168 |
fig_strat = go.Figure()
|
| 169 |
fig_price = go.Figure()
|
| 170 |
dfs_display = []
|
|
@@ -223,7 +225,7 @@ def show_company_data(selected_companies, aggregation="Day"):
|
|
| 223 |
df_display = pd.concat(dfs_display, ignore_index=True) if dfs_display else pd.DataFrame()
|
| 224 |
return df_display, fig_strat, fig_price
|
| 225 |
|
| 226 |
-
# ---
|
| 227 |
description_text = """
|
| 228 |
### Portfolio Strategy Comparison Dashboard
|
| 229 |
This dashboard allows you to compare the performance of three sentiment models in driving trading strategies for Microsoft and Tesla.
|
|
|
|
| 7 |
from huggingface_hub import login
|
| 8 |
import os
|
| 9 |
|
| 10 |
+
# Login Hugging Face
|
| 11 |
token = os.getenv("HF_TOKEN")
|
| 12 |
login(token=token)
|
| 13 |
|
| 14 |
+
# --- Costanti ---
|
| 15 |
HF_DATASET = "SelmaNajih001/FT_MultiCompany"
|
| 16 |
HF_PRIVATE_DATASET = "SelmaNajih001/portfolio_strategy_data"
|
| 17 |
MODEL_SENTIMENT = "SelmaNajih001/SentimentBasedOnPriceVariation"
|
|
|
|
| 25 |
}
|
| 26 |
companies = list(TICKERS.keys())
|
| 27 |
|
| 28 |
+
# --- Pipelines ---
|
| 29 |
sentiment_pipeline = pipeline("sentiment-analysis", model=MODEL_SENTIMENT)
|
| 30 |
price_pipeline_tesla = pipeline("text-classification", model=MODEL_PRICE_TESLA)
|
| 31 |
price_pipeline_msft = pipeline("text-classification", model=MODEL_PRICE_MICROSOFT)
|
| 32 |
finbert_pipeline = pipeline("sentiment-analysis", model=MODEL_FINBERT)
|
| 33 |
|
| 34 |
+
# --- Caricamento dataset ---
|
| 35 |
df_multi = pd.DataFrame(load_dataset(HF_DATASET)["train"])
|
| 36 |
df_multi['date'] = pd.to_datetime(df_multi['Date'], errors='coerce')
|
| 37 |
df_multi['date_merge'] = df_multi['date'].dt.normalize()
|
|
|
|
| 43 |
except:
|
| 44 |
df_existing = pd.DataFrame()
|
| 45 |
|
| 46 |
+
# --- Determina nuove righe ---
|
| 47 |
if not df_existing.empty:
|
| 48 |
df_to_add = df_multi[~df_multi['Date'].isin(df_existing['Date'])]
|
| 49 |
else:
|
| 50 |
df_to_add = df_multi.copy()
|
| 51 |
|
| 52 |
+
# --- Calcolo solo sulle nuove righe ---
|
| 53 |
+
df_to_add['Sentiment'] = ""
|
| 54 |
+
df_to_add['Confidence'] = 0.0
|
| 55 |
+
df_to_add['Predicted'] = 0.0
|
| 56 |
+
df_to_add['FinBERT_Sentiment'] = ""
|
| 57 |
+
df_to_add['FinBERT_Confidence'] = 0.0
|
|
|
|
|
|
|
| 58 |
|
| 59 |
+
for i, row in df_to_add.iterrows():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
company = row['Company']
|
| 61 |
|
| 62 |
# Custom sentiment
|
| 63 |
try:
|
| 64 |
res = sentiment_pipeline(row['Summary'])[0]
|
| 65 |
+
df_to_add.at[i,'Sentiment'] = res['label'].upper().strip()
|
| 66 |
+
df_to_add.at[i,'Confidence'] = res['score']
|
| 67 |
except:
|
| 68 |
+
df_to_add.at[i,'Sentiment'] = 'ERROR'
|
| 69 |
+
df_to_add.at[i,'Confidence'] = 0.0
|
| 70 |
|
| 71 |
# FinBERT
|
| 72 |
try:
|
| 73 |
res_f = finbert_pipeline(row['Summary'])[0]
|
| 74 |
+
df_to_add.at[i,'FinBERT_Sentiment'] = res_f['label'].upper().strip()
|
| 75 |
+
df_to_add.at[i,'FinBERT_Confidence'] = res_f['score']
|
| 76 |
except:
|
| 77 |
+
df_to_add.at[i,'FinBERT_Sentiment'] = 'ERROR'
|
| 78 |
+
df_to_add.at[i,'FinBERT_Confidence'] = 0.0
|
| 79 |
|
| 80 |
+
# Regression
|
| 81 |
try:
|
| 82 |
if company == "Tesla, Inc.":
|
| 83 |
val = price_pipeline_tesla(row['Summary'])[0]['score']
|
| 84 |
+
df_to_add.at[i,'Predicted'] = max(val, 1.0)
|
| 85 |
elif company == "Microsoft":
|
| 86 |
val = price_pipeline_msft(row['Summary'])[0]['score']
|
| 87 |
+
df_to_add.at[i,'Predicted'] = max(val, 1.0)
|
| 88 |
except:
|
| 89 |
+
df_to_add.at[i,'Predicted'] = 0.0
|
| 90 |
|
| 91 |
+
# --- Aggiorna dataset esistente ---
|
| 92 |
+
if not df_existing.empty:
|
| 93 |
+
df_updated = pd.concat([df_existing, df_to_add], ignore_index=True)
|
| 94 |
+
else:
|
| 95 |
+
df_updated = df_to_add.copy()
|
| 96 |
+
|
| 97 |
+
# --- Push su Hugging Face ---
|
| 98 |
+
hf_dataset_updated = Dataset.from_pandas(df_updated)
|
| 99 |
+
hf_dataset_updated.push_to_hub(HF_PRIVATE_DATASET, private=True)
|
| 100 |
+
print(f"Dataset aggiornato su Hugging Face: {HF_PRIVATE_DATASET}")
|
| 101 |
+
|
| 102 |
+
# --- Resto del codice (prezzi, strategie, Gradio) ---
|
| 103 |
+
df_multi = df_updated.copy()
|
| 104 |
|
| 105 |
prices = {}
|
| 106 |
for company, ticker in TICKERS.items():
|
|
|
|
| 123 |
df_c['Month'] = df_c['date'].dt.to_period('M').dt.to_timestamp()
|
| 124 |
df_c['Year'] = df_c['date'].dt.year
|
| 125 |
|
| 126 |
+
# Strategy A
|
| 127 |
df_c['StrategyA_Cumulative'] = 0.0
|
| 128 |
for i in range(1, len(df_c)):
|
| 129 |
pct = df_c.loc[i, 'PctChangeDaily'] if pd.notnull(df_c.loc[i,'PctChangeDaily']) else 0
|
| 130 |
+
price = df_c.loc[i-1, f'Close_{TICKERS[company]}']
|
|
|
|
| 131 |
if df_c.loc[i, 'Sentiment'] == "UP" and df_c.loc[i,'Confidence'] > 0.8:
|
| 132 |
df_c.loc[i,'StrategyA_Cumulative'] = df_c.loc[i-1,'StrategyA_Cumulative'] + price * pct
|
| 133 |
elif df_c.loc[i, 'Sentiment'] == "DOWN" and df_c.loc[i,'Confidence'] > 0.8:
|
| 134 |
df_c.loc[i,'StrategyA_Cumulative'] = df_c.loc[i-1,'StrategyA_Cumulative'] - price * pct
|
| 135 |
else:
|
| 136 |
df_c.loc[i,'StrategyA_Cumulative'] = df_c.loc[i-1,'StrategyA_Cumulative']
|
| 137 |
+
# Strategy B
|
| 138 |
df_c['StrategyB_Cumulative'] = 0.0
|
| 139 |
for i in range(1, len(df_c)):
|
| 140 |
pct = df_c.loc[i, 'PctChangeDaily'] if pd.notnull(df_c.loc[i,'PctChangeDaily']) else 0
|
| 141 |
price = df_c.loc[i-1, f'Close_{TICKERS[company]}']
|
| 142 |
predicted = df_c.loc[i, 'Predicted']
|
|
|
|
| 143 |
if predicted > 1:
|
| 144 |
df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative'] + price * pct
|
| 145 |
elif predicted < -1:
|
| 146 |
df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative'] - price * pct
|
| 147 |
else:
|
| 148 |
df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative']
|
| 149 |
+
# Strategy C
|
|
|
|
| 150 |
df_c['StrategyC_Cumulative'] = 0.0
|
| 151 |
for i in range(1, len(df_c)):
|
| 152 |
pct = df_c.loc[i, 'PctChangeDaily'] if pd.notnull(df_c.loc[i,'PctChangeDaily']) else 0
|
| 153 |
price = df_c.loc[i-1, f'Close_{TICKERS[company]}']
|
|
|
|
| 154 |
if df_c.loc[i, 'FinBERT_Sentiment'] == "POSITIVE" and df_c.loc[i,'FinBERT_Confidence'] > 0.8:
|
| 155 |
df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative'] + price * pct
|
| 156 |
elif df_c.loc[i, 'FinBERT_Sentiment'] == "NEGATIVE" and df_c.loc[i,'FinBERT_Confidence'] > 0.8:
|
|
|
|
| 160 |
|
| 161 |
dfs_final[company] = df_c.drop(columns=["date", "date_merge"], errors="ignore")
|
| 162 |
|
| 163 |
+
# --- Funzione Gradio ---
|
| 164 |
def show_company_data(selected_companies, aggregation="Day"):
|
| 165 |
if not selected_companies:
|
| 166 |
return pd.DataFrame(), None, None
|
| 167 |
|
| 168 |
agg_col = {"Day": "Day", "Month": "Month", "Year": "Year"}.get(aggregation, "Day")
|
| 169 |
|
|
|
|
| 170 |
fig_strat = go.Figure()
|
| 171 |
fig_price = go.Figure()
|
| 172 |
dfs_display = []
|
|
|
|
| 225 |
df_display = pd.concat(dfs_display, ignore_index=True) if dfs_display else pd.DataFrame()
|
| 226 |
return df_display, fig_strat, fig_price
|
| 227 |
|
| 228 |
+
# --- Gradio Interface ---
|
| 229 |
description_text = """
|
| 230 |
### Portfolio Strategy Comparison Dashboard
|
| 231 |
This dashboard allows you to compare the performance of three sentiment models in driving trading strategies for Microsoft and Tesla.
|