new_analyisis / app.py
BSJ2004's picture
Upload 4 files
117edc7 verified
import pandas as pd
import gradio as gr
import plotly.express as px
import warnings
import os
warnings.filterwarnings('ignore')
# --- Data Loading ---
def load_data():
# Path to the CSV file (relative to this script)
csv_path = os.path.join(os.path.dirname(__file__), 'xbrl_data_cleaned.csv')
print(f"Loading data from: {csv_path}")
# Define target metrics with both Current Year (DCY) and Previous Year (DPY)
target_keywords = {
'Revenue': 'Revenue From Operations_DCYMain',
'Revenue_Prev': 'Revenue From Operations_DPYMain',
'Net Worth': 'Net Worth_DCYMain',
'Net Worth_Prev': 'Net Worth_DPYMain',
'Investments': 'Amount Of Total Investments_DCYMain',
'Investments_Prev': 'Amount Of Total Investments_DPYMain',
'Purchases': 'Amount Of Total Purchases_DCYMain',
'Purchases_Prev': 'Amount Of Total Purchases_DPYMain'
}
selected_cols = ['Company', 'Symbol', 'Industry']
rename_map = {}
try:
# Read header to check columns
df_header = pd.read_csv(csv_path, nrows=0)
all_cols = df_header.columns.tolist()
for key, col_name in target_keywords.items():
if col_name in all_cols:
selected_cols.append(col_name)
rename_map[col_name] = key
# Load dataset with selected columns
df = pd.read_csv(csv_path, usecols=lambda x: x in selected_cols)
df.rename(columns=rename_map, inplace=True)
# Clean numeric columns
numeric_cols = list(rename_map.values())
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
print("Data Loaded Successfully!")
return df
except Exception as e:
print(f"Error loading data: {e}")
return pd.DataFrame()
df = load_data()
companies = df['Company'].unique().tolist() if not df.empty else []
# --- Dashboard Functions ---
def get_overall_stats():
if df.empty:
return {}
stats = {
"Total Companies": len(df['Company'].unique()),
"Total Revenue": df['Revenue'].sum() if 'Revenue' in df.columns else 0,
"Avg Net Worth": df['Net Worth'].mean() if 'Net Worth' in df.columns else 0,
"Top Industry": df['Industry'].mode()[0] if 'Industry' in df.columns else "N/A"
}
return stats
def plot_industry_dist():
if df.empty or 'Industry' not in df.columns:
return None
industry_counts = df['Industry'].value_counts().head(10)
fig = px.pie(values=industry_counts.values, names=industry_counts.index, title='Top 10 Industries')
return fig
def plot_revenue_vs_networth():
if df.empty or 'Revenue' not in df.columns or 'Net Worth' not in df.columns:
return None
fig = px.scatter(df, x='Revenue', y='Net Worth', hover_name='Company', title='Revenue vs Net Worth')
return fig
def get_company_details(company_name):
if df.empty:
return "No data loaded", None, None
comp_data = df[df['Company'] == company_name]
if comp_data.empty:
return "Company not found", None, None
# Metrics
metrics = comp_data.iloc[0].to_dict()
# Visualization 1: Current Year Metrics
cy_metrics = {k: v for k, v in metrics.items() if k in ['Revenue', 'Net Worth', 'Investments', 'Purchases'] and isinstance(v, (int, float))}
fig_cy = px.bar(x=list(cy_metrics.keys()), y=list(cy_metrics.values()), title=f"Current Year Financials for {company_name}")
# Visualization 2: Year over Year Comparison
categories = ['Revenue', 'Net Worth', 'Investments', 'Purchases']
yoy_data = []
for cat in categories:
if cat in metrics:
yoy_data.append({'Metric': cat, 'Year': 'Current', 'Value': metrics[cat]})
if f"{cat}_Prev" in metrics:
yoy_data.append({'Metric': cat, 'Year': 'Previous', 'Value': metrics[f"{cat}_Prev"]})
if yoy_data:
df_yoy = pd.DataFrame(yoy_data)
fig_yoy = px.bar(df_yoy, x='Metric', y='Value', color='Year', barmode='group', title=f"Year-over-Year Comparison for {company_name}")
else:
fig_yoy = None
# Format metrics text
metrics_str = "\n".join([f"{k}: {v}" for k, v in metrics.items() if isinstance(v, (int, float, str))])
return metrics_str, fig_cy, fig_yoy
# --- Gradio Interface ---
with gr.Blocks(title="XBRL Financial Dashboard") as demo:
gr.Markdown("# 📊 XBRL Data Analysis Dashboard")
gr.Markdown("Explore financial data, industry trends, and company performance.")
with gr.Tabs():
# Tab 1: Overall Dashboard
with gr.TabItem("📈 Market Overview"):
gr.Markdown("### Key Performance Indicators")
stats = get_overall_stats()
with gr.Row():
gr.Number(label="Total Companies", value=stats.get('Total Companies', 0))
gr.Number(label="Total Revenue", value=stats.get('Total Revenue', 0))
gr.Number(label="Average Net Worth", value=stats.get('Avg Net Worth', 0))
gr.Textbox(label="Top Industry", value=stats.get('Top Industry', "N/A"))
gr.Markdown("### Market Visualizations")
with gr.Row():
with gr.Column():
plot_ind = plot_industry_dist()
if plot_ind:
gr.Plot(plot_ind)
else:
gr.Markdown("No Industry Data Available")
with gr.Column():
plot_rev_nw = plot_revenue_vs_networth()
if plot_rev_nw:
gr.Plot(plot_rev_nw)
else:
gr.Markdown("Insufficient Data for Scatter Plot")
# Tab 2: Company Specific Analysis
with gr.TabItem("🏢 Company Analysis"):
gr.Markdown("### Deep Dive into Company Financials")
with gr.Row():
company_dropdown = gr.Dropdown(choices=companies, label="Select Company", interactive=True)
with gr.Row():
with gr.Column(scale=1):
company_info = gr.Textbox(label="Company Details", lines=15)
with gr.Column(scale=2):
company_plot_cy = gr.Plot(label="Current Year Metrics")
company_plot_yoy = gr.Plot(label="Year-over-Year Comparison")
company_dropdown.change(
fn=get_company_details,
inputs=company_dropdown,
outputs=[company_info, company_plot_cy, company_plot_yoy]
)
if __name__ == "__main__":
print("Launching Gradio App...")
demo.launch(server_name="0.0.0.0", share=True)