Spaces:
Running
Running
| import json | |
| import pandas as pd | |
| import re | |
| import plotly.express as px | |
| def parse_revenue(rev_str): | |
| if not rev_str or not isinstance(rev_str, str): | |
| return None | |
| match = re.search(r"\$([\d\.]+)\s*(million|billion)?", rev_str.lower()) | |
| if match: | |
| num = float(match.group(1)) | |
| scale = match.group(2) | |
| if scale == "billion": | |
| num *= 1e9 | |
| elif scale == "million": | |
| num *= 1e6 | |
| return num | |
| return None | |
| def df_creator_from_json_and_process(filepath: str): | |
| with open(filepath, "r") as f: | |
| data = json.load(f)["companies"] | |
| for c in data: | |
| if "score" not in c: | |
| c["score"] = None | |
| return pd.DataFrame(data) | |
| def create_chart(filepath: str): | |
| df = df_creator_from_json_and_process(filepath) | |
| # print(df) | |
| industry_counts = df["key_industry"].value_counts().reset_index() | |
| industry_counts.columns = ["Industry", "Count"] | |
| country_counts = df["country"].value_counts().reset_index() | |
| country_counts.columns = ["Country", "Count"] | |
| btype_counts= df["business_type"].value_counts().reset_index() | |
| btype_counts.columns = ["Business Type", "Count"] | |
| df["approx_revenue_usd"] = df["approx_revenue"].apply(parse_revenue) | |
| fig_industry = px.pie( | |
| industry_counts, | |
| names="Industry", | |
| values="Count", | |
| title="Distribution of Companies by Industry", | |
| # hole=0.3 # Optional: Creates a donut chart | |
| ) | |
| fig_country = px.pie( | |
| country_counts, | |
| names="Country", | |
| values="Count", | |
| title="Distribution of Companies by Country" | |
| ) | |
| fig_btype = px.pie( | |
| btype_counts, | |
| names = "Business Type", | |
| values="Count", | |
| title="Distribution of Companies by Business types" | |
| ) | |
| fig_rev = px.bar( | |
| df.sort_values(by="approx_revenue_usd", ascending=False), | |
| x="company_name", | |
| y="approx_revenue_usd", | |
| color="key_industry", | |
| title="Company Revenue Comparison", | |
| text="approx_revenue" | |
| ) | |
| fig_industry.update_layout( | |
| legend=dict( | |
| orientation="h", | |
| yanchor="top", | |
| y=-0.2, | |
| xanchor="center", | |
| x=0.5 | |
| ), | |
| width = 400, | |
| height=480, | |
| margin=dict(l=0, r=0, b=0, t=20), | |
| uniformtext_minsize=10, | |
| uniformtext_mode='hide' | |
| ) | |
| fig_country.update_layout( | |
| legend=dict( | |
| orientation="h", | |
| yanchor="top", | |
| y=-0.1, | |
| xanchor="center", | |
| x=0.5 | |
| ), | |
| height=300, | |
| margin=dict(l=0, r=0, b=0, t=20), | |
| uniformtext_minsize=10, | |
| uniformtext_mode='hide' | |
| ) | |
| fig_btype.update_layout( | |
| legend=dict( | |
| orientation="h", | |
| yanchor="top", | |
| y=-0.1, | |
| xanchor="center", | |
| x=0.5 | |
| ), | |
| width = 400, | |
| height=300, | |
| margin=dict(l=0, r=0, b=0, t=20), | |
| uniformtext_minsize=10, | |
| uniformtext_mode='hide' | |
| ) | |
| fig_rev.update_layout( | |
| xaxis_title="Company", | |
| yaxis_title="Revenue (USD)", | |
| yaxis_tickformat="$,.0f", | |
| uniformtext_minsize=10, | |
| uniformtext_mode='hide', | |
| legend = dict( | |
| orientation="h", | |
| yanchor="top", | |
| xanchor="center", | |
| x=0.5, | |
| y=-0.8 | |
| ) | |
| ) | |
| return fig_industry, fig_country, fig_btype, fig_rev |