Subhajit Chakraborty
modify(1)
eb93708
import json
import pandas as pd
import re
import plotly.express as px
def parse_revenue(rev_str):
if not rev_str or not isinstance(rev_str, str):
return None
match = re.search(r"\$([\d\.]+)\s*(million|billion)?", rev_str.lower())
if match:
num = float(match.group(1))
scale = match.group(2)
if scale == "billion":
num *= 1e9
elif scale == "million":
num *= 1e6
return num
return None
def df_creator_from_json_and_process(filepath: str):
with open(filepath, "r") as f:
data = json.load(f)["companies"]
for c in data:
if "score" not in c:
c["score"] = None
return pd.DataFrame(data)
def create_chart(filepath: str):
df = df_creator_from_json_and_process(filepath)
# print(df)
industry_counts = df["key_industry"].value_counts().reset_index()
industry_counts.columns = ["Industry", "Count"]
country_counts = df["country"].value_counts().reset_index()
country_counts.columns = ["Country", "Count"]
btype_counts= df["business_type"].value_counts().reset_index()
btype_counts.columns = ["Business Type", "Count"]
df["approx_revenue_usd"] = df["approx_revenue"].apply(parse_revenue)
fig_industry = px.pie(
industry_counts,
names="Industry",
values="Count",
title="Distribution of Companies by Industry",
# hole=0.3 # Optional: Creates a donut chart
)
fig_country = px.pie(
country_counts,
names="Country",
values="Count",
title="Distribution of Companies by Country"
)
fig_btype = px.pie(
btype_counts,
names = "Business Type",
values="Count",
title="Distribution of Companies by Business types"
)
fig_rev = px.bar(
df.sort_values(by="approx_revenue_usd", ascending=False),
x="company_name",
y="approx_revenue_usd",
color="key_industry",
title="Company Revenue Comparison",
text="approx_revenue"
)
fig_industry.update_layout(
legend=dict(
orientation="h",
yanchor="top",
y=-0.2,
xanchor="center",
x=0.5
),
width = 400,
height=480,
margin=dict(l=0, r=0, b=0, t=20),
uniformtext_minsize=10,
uniformtext_mode='hide'
)
fig_country.update_layout(
legend=dict(
orientation="h",
yanchor="top",
y=-0.1,
xanchor="center",
x=0.5
),
height=300,
margin=dict(l=0, r=0, b=0, t=20),
uniformtext_minsize=10,
uniformtext_mode='hide'
)
fig_btype.update_layout(
legend=dict(
orientation="h",
yanchor="top",
y=-0.1,
xanchor="center",
x=0.5
),
width = 400,
height=300,
margin=dict(l=0, r=0, b=0, t=20),
uniformtext_minsize=10,
uniformtext_mode='hide'
)
fig_rev.update_layout(
xaxis_title="Company",
yaxis_title="Revenue (USD)",
yaxis_tickformat="$,.0f",
uniformtext_minsize=10,
uniformtext_mode='hide',
legend = dict(
orientation="h",
yanchor="top",
xanchor="center",
x=0.5,
y=-0.8
)
)
return fig_industry, fig_country, fig_btype, fig_rev