zainulabedin949's picture
Update app.py
8591c69 verified
import gradio as gr
import pandas as pd
import numpy as np
import yfinance as yf
import plotly.express as px
import plotly.graph_objects as go
from scipy import stats
yf.pdr_override = False # we use yf.download directly
def fetch_price_data(tickers, period, interval):
tickers = [t.strip().upper() for t in tickers.split(",") if t.strip()]
if len(tickers) == 0:
raise ValueError("Please provide at least one ticker.")
data = yf.download(tickers, period=period, interval=interval, group_by='ticker', progress=False, threads=True)
if data.empty:
return {}
result = {}
# multiindex when >1 ticker
if isinstance(data.columns, pd.MultiIndex):
for t in tickers:
if t in data.columns.levels[0]:
df = data[t].copy().dropna(how='all')
if not df.empty:
df = df.reset_index().rename(columns={"index": "Date"})
result[t] = df
else:
df = data.copy().dropna(how='all')
if not df.empty:
df = df.reset_index().rename(columns={"index": "Date"})
# if multiple tickers requested but single returned, map each ticker to same df
if len(tickers) == 1:
result[tickers[0]] = df
else:
# try to infer columns have ticker suffixes (rare). fallback: use first ticker
result[tickers[0]] = df
return result
def compute_returns(df, price_col="Close"):
s = df[price_col].pct_change().dropna()
return s
def make_summary_table(price_dfs, rf_rate):
rows = []
for t, df in price_dfs.items():
r = compute_returns(df)
daily_mean = r.mean()
daily_std = r.std(ddof=0)
ann_return = (1 + daily_mean) ** 252 - 1
ann_vol = daily_std * np.sqrt(252)
sharpe = (ann_return - rf_rate) / (ann_vol or 1e-9)
rows.append({
"ticker": t,
"obs": r.shape[0],
"daily_mean": float(daily_mean),
"daily_std": float(daily_std),
"ann_return": float(ann_return),
"ann_vol": float(ann_vol),
"sharpe": float(sharpe)
})
return pd.DataFrame(rows).set_index("ticker")
def correlation_matrix(price_dfs):
rets = {}
for t, df in price_dfs.items():
rets[t] = compute_returns(df)
rets_df = pd.DataFrame(rets).dropna(how='all')
if rets_df.shape[1] == 0:
return pd.DataFrame()
return rets_df.corr(), rets_df
def t_test_between(price_dfs, ticker_a, ticker_b, paired=False):
if ticker_a not in price_dfs or ticker_b not in price_dfs:
raise ValueError("Both tickers must be present in the loaded data.")
ra = compute_returns(price_dfs[ticker_a])
rb = compute_returns(price_dfs[ticker_b])
# align lengths by inner join on index if possible
df = pd.concat([ra.rename(ticker_a), rb.rename(ticker_b)], axis=1).dropna()
if df.shape[0] < 3:
raise ValueError("Not enough overlapping return observations to run t-test.")
a = df[ticker_a]
b = df[ticker_b]
if paired:
t_stat, p_val = stats.ttest_rel(a, b, nan_policy='omit')
else:
t_stat, p_val = stats.ttest_ind(a, b, equal_var=False, nan_policy='omit')
return {"t_stat": float(t_stat), "p_value": float(p_val), "n": int(df.shape[0]), "mean_a": float(a.mean()), "mean_b": float(b.mean())}
def make_price_plot(price_dfs, show_returns=False):
fig = go.Figure()
for t, df in price_dfs.items():
if df.empty:
continue
if show_returns:
r = compute_returns(df)
fig.add_trace(go.Line(x=r.index, y=r.values, name=f"{t} returns"))
else:
fig.add_trace(go.Line(x=df['Date'], y=df['Close'], name=f"{t} Close"))
fig.update_layout(margin=dict(l=10, r=10, t=40, b=20), legend_title=None)
return fig
def rolling_vol_plot(price_dfs, window=21):
fig = go.Figure()
for t, df in price_dfs.items():
r = compute_returns(df)
rv = r.rolling(window=window).std(ddof=0) * np.sqrt(252)
fig.add_trace(go.Line(x=rv.index, y=rv.values, name=f"{t} rolling vol ({window})"))
fig.update_layout(title=f"Rolling Volatility (window={window})", margin=dict(l=10, r=10, t=40, b=20))
return fig
# Main Gradio function
def run_analyzer(tickers, period, interval, rf_pct, analysis_type, tick_a, tick_b, paired, rolling_window):
try:
rf_rate = float(rf_pct) / 100.0
except:
rf_rate = 0.0
try:
price_dfs = fetch_price_data(tickers, period, interval)
except Exception as e:
return f"Error fetching data: {e}", None
if not price_dfs:
return "No data returned for the provided tickers/period. Check ticker symbols and try again.", None
if analysis_type == "Summary":
summary = make_summary_table(price_dfs, rf_rate)
html = summary.round(6).to_html(classes="table table-striped", border=0)
fig = make_price_plot(price_dfs, show_returns=False)
return html, fig
if analysis_type == "Correlation Matrix":
corr, rets_df = correlation_matrix(price_dfs)
if corr.empty:
return "Not enough return data to compute correlations.", None
html = corr.round(4).to_html(classes="table table-striped", border=0)
fig = px.imshow(corr.values, x=corr.columns, y=corr.index, color_continuous_scale='RdBu', zmin=-1, zmax=1)
fig.update_layout(title="Return Correlation Matrix")
return html, fig
if analysis_type == "T-test (two tickers)":
if not tick_a or not tick_b:
return "Provide both tickers for the t-test (Ticker A and Ticker B).", None
try:
res = t_test_between(price_dfs, tick_a.strip().upper(), tick_b.strip().upper(), paired=paired)
except Exception as e:
return f"T-test error: {e}", None
html = "<h4>T-test Result</h4><ul>"
html += f"<li>n={res['n']}</li>"
html += f"<li>t_stat={res['t_stat']:.6f}</li>"
html += f"<li>p_value={res['p_value']:.6f}</li>"
html += f"<li>mean_{tick_a}={res['mean_a']:.6f}</li>"
html += f"<li>mean_{tick_b}={res['mean_b']:.6f}</li>"
html += "</ul>"
fig = make_price_plot({tick_a.strip().upper(): price_dfs[tick_a.strip().upper()],
tick_b.strip().upper(): price_dfs[tick_b.strip().upper()]}, show_returns=False)
return html, fig
if analysis_type == "Volatility & Sharpe":
summary = make_summary_table(price_dfs, rf_rate)
html = summary.round(6).to_html(classes="table table-striped", border=0)
fig = rolling_vol_plot(price_dfs, window=int(rolling_window))
return html, fig
return "Unknown analysis type.", None
# Gradio UI
with gr.Blocks(title="Financial Stats Analyzer") as demo:
gr.Markdown("## 📈 Financial Stats Analyzer\nRun quick statistical tests and diagnostics on stock returns. Uses Yahoo Finance (yfinance).")
with gr.Row():
with gr.Column(scale=2):
tickers = gr.Textbox(label="Tickers (comma-separated)", value="AAPL, MSFT, NVDA")
period = gr.Dropdown(choices=["1mo", "3mo", "6mo", "1y", "2y", "5y"], value="6mo", label="Period")
interval = gr.Dropdown(choices=["1d", "1wk", "1mo"], value="1d", label="Interval")
rf_pct = gr.Number(value=0.0, label="Risk-free rate (annual %, e.g., 2.0 for 2%)")
analysis_type = gr.Radio(choices=["Summary", "Correlation Matrix", "T-test (two tickers)", "Volatility & Sharpe"],
value="Summary", label="Analysis Type")
with gr.Row():
tick_a = gr.Textbox(label="Ticker A (for t-test)", placeholder="e.g., AAPL")
tick_b = gr.Textbox(label="Ticker B (for t-test)", placeholder="e.g., MSFT")
paired = gr.Checkbox(label="Paired T-test (check if returns are paired)", value=False)
rolling_window = gr.Slider(minimum=5, maximum=126, value=21, step=1, label="Rolling window (days) for volatility")
run_btn = gr.Button("Run Analysis")
with gr.Column(scale=3):
out_html = gr.HTML()
out_plot = gr.Plot()
run_btn.click(fn=run_analyzer,
inputs=[tickers, period, interval, rf_pct, analysis_type, tick_a, tick_b, paired, rolling_window],
outputs=[out_html, out_plot])
if __name__ == "__main__":
demo.launch()