File size: 3,461 Bytes
723bbe6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb93708
 
 
723bbe6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import json
import pandas as pd
import re
import plotly.express as px


def parse_revenue(rev_str):
    if not rev_str or not isinstance(rev_str, str):
        return None
    match = re.search(r"\$([\d\.]+)\s*(million|billion)?", rev_str.lower())
    if match:
        num = float(match.group(1))
        scale = match.group(2)
        if scale == "billion":
            num *= 1e9
        elif scale == "million":
            num *= 1e6
        return num
    return None

def df_creator_from_json_and_process(filepath: str):
    with open(filepath, "r") as f:
        data = json.load(f)["companies"]
    for c in data:
        if "score" not in c:
            c["score"] = None
    return pd.DataFrame(data)

def create_chart(filepath: str):
    df = df_creator_from_json_and_process(filepath)
    # print(df)
    industry_counts = df["key_industry"].value_counts().reset_index()
    industry_counts.columns = ["Industry", "Count"]
    country_counts = df["country"].value_counts().reset_index()
    country_counts.columns = ["Country", "Count"]
    btype_counts= df["business_type"].value_counts().reset_index()
    btype_counts.columns = ["Business Type", "Count"]

    df["approx_revenue_usd"] = df["approx_revenue"].apply(parse_revenue)

    fig_industry = px.pie(
        industry_counts,
        names="Industry",
        values="Count",
        title="Distribution of Companies by Industry",
        # hole=0.3  # Optional: Creates a donut chart
    )
    fig_country = px.pie(
        country_counts,
        names="Country",
        values="Count",
        title="Distribution of Companies by Country"
    )
    fig_btype = px.pie(
        btype_counts,
        names = "Business Type",
        values="Count",
        title="Distribution of Companies by Business types"
    )
    fig_rev = px.bar(
        df.sort_values(by="approx_revenue_usd", ascending=False),
        x="company_name",
        y="approx_revenue_usd",
        color="key_industry",
        title="Company Revenue Comparison",
        text="approx_revenue"
    )

    fig_industry.update_layout(
        legend=dict(
            orientation="h",
            yanchor="top",
            y=-0.2,
            xanchor="center",
            x=0.5
        ),
        width = 400,
        height=480,
        margin=dict(l=0, r=0, b=0, t=20),
        uniformtext_minsize=10,
        uniformtext_mode='hide'
    )

    fig_country.update_layout(
        legend=dict(
            orientation="h",
            yanchor="top",
            y=-0.1,
            xanchor="center",
            x=0.5
        ),
        height=300,
        margin=dict(l=0, r=0, b=0, t=20),
        uniformtext_minsize=10,
        uniformtext_mode='hide'
    )
    fig_btype.update_layout(
        legend=dict(
            orientation="h",
            yanchor="top",
            y=-0.1,
            xanchor="center",
            x=0.5
        ),
        width = 400,
        height=300,
        margin=dict(l=0, r=0, b=0, t=20),
        uniformtext_minsize=10,
        uniformtext_mode='hide'
    )

    fig_rev.update_layout(
        xaxis_title="Company",
        yaxis_title="Revenue (USD)",
        yaxis_tickformat="$,.0f",
        uniformtext_minsize=10,
        uniformtext_mode='hide',
        legend = dict(
            orientation="h",
            yanchor="top",
            xanchor="center",
            x=0.5,
            y=-0.8
        )
    )

    return fig_industry, fig_country, fig_btype, fig_rev