File size: 4,348 Bytes
d181dd1
 
 
58b074e
 
896acc2
58b074e
896acc2
 
bf620e1
 
 
 
 
 
5a192b9
 
bf620e1
 
 
896acc2
bf620e1
896acc2
bf620e1
2d7fbe5
bf620e1
896acc2
 
bf620e1
896acc2
bf620e1
4ceac99
bf620e1
896acc2
 
bf620e1
896acc2
c2c32e0
 
896acc2
6dcb7f9
c2c32e0
 
 
 
 
 
 
 
58b074e
 
 
 
 
 
 
 
ea004be
 
 
 
 
 
 
 
58b074e
ea004be
58b074e
 
 
 
 
 
 
 
 
 
 
 
 
ca893a9
58b074e
 
 
 
 
 
 
 
 
 
 
 
 
3a76d91
 
 
58b074e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
__copyright__ = "Copyright (C) 2023 Ali Mustapha"
__license__ = "GPL-3.0-or-later"

import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

def get_commits_per_gender(gender_counts):
    gender_counts=gender_counts[gender_counts["Predicted_Gender"]!="Unknown"]

    grouped = gender_counts.groupby('Year').agg({'Count': 'sum'})
    grouped['Male Percentage'] = gender_counts[gender_counts['Predicted_Gender'] == 'Male'].groupby('Year')['Count'].sum() / grouped['Count'] * 100
    grouped['Female Percentage'] = gender_counts[gender_counts['Predicted_Gender'] == 'Female'].groupby('Year')['Count'].sum() / grouped['Count'] * 100
    grouped=grouped.fillna(0)
    merged_gender_counts = grouped.merge(gender_counts, on=["Year"])[['Year', 'Male Percentage', 'Female Percentage',
       'Predicted_Gender', 'Count_y']].rename(columns={"Count_y":"Count"})
    
    male_count=merged_gender_counts[merged_gender_counts["Predicted_Gender"]=="Male"]
    female_count=merged_gender_counts[merged_gender_counts["Predicted_Gender"]=="Female"]
    fig = make_subplots(rows=1, cols=1, shared_xaxes=True)
    # Add bars for Male and Female counts
    fig.add_trace(
        go.Bar(x=male_count['Year'], y=male_count['Count'], name='Male',
            hovertemplate='Male Commits: %{y:.2f}<br> Male Percentage: %{customdata:.2f}',  # Use customdata for the hovertemplate
            customdata=male_count['Male Percentage']),  # Use the 'Male Percentage' column for customdata
        row=1, col=1
    )

    fig.add_trace(
        go.Bar(x=female_count['Year'], y=female_count['Count'], name='Female',
            hovertemplate='Female Commits: %{y:.2f}<br>Female Percentage: %{customdata:.2f}',  # Use customdata for the hovertemplate
            customdata=female_count['Female Percentage']),  # Use the 'Female Percentage' column for customdata
        row=1, col=1
    )

    return fig


def get_gender_percentage(df):
    
    counts = df['Predicted_Gender'].value_counts().to_dict()

    # Define the labels and their corresponding counts
    labels = ["Male", "Female", "Unknown"]
    values = [counts.get(label, 0) for label in labels]

    Gender_Percentage_plot = go.Figure(data=[go.Pie(labels=labels, values=values, marker=dict(colors=["blue","pink","gray"]))])

    return Gender_Percentage_plot

def get_commits_per_region(df,url):
    Country_to_region=pd.read_csv("utils/CodeToRegion.csv")
    Country_to_region=Country_to_region.rename(columns={"sub-region":"sub-region-prediction"})
    
    # Group by Year and sub-region-prediction, then count unique sub-regions
    sub_region_counts = df.groupby(['Year', "sub-region-prediction"])["sub-region-prediction"].count().reset_index(name='Count')
    # Calculate the sums for each region
    region_sums = sub_region_counts.groupby('sub-region-prediction')['Count'].sum().reset_index()

    # Append 'All' rows to the DataFrame
    df_all = region_sums.copy()
    df_all['Year'] = 'Overall'
    df_result = pd.concat([sub_region_counts, df_all], ignore_index=True, sort=False)

    # Merge the sub_region_counts DataFrame with the Country_to_region DataFrame
    merged_df = df_result.merge(Country_to_region, on="sub-region-prediction", how="left")
    # Create a choropleth map using plotly.express
    fig = go.Figure(data=px.choropleth(
        merged_df,
        locations="code3",
        color="Count",
        hover_name="sub-region-prediction",
        # color_continuous_scale=px.colors.qualitative.Set3,  # Choose your color scale
        color_continuous_scale="Greens",
        animation_frame="Year",
        title="Counts by Sub-Region"
    ))
    
    fig.update_layout(
        title_text='Commits Counts by Sub-Region for '+url,
        geo=dict(
            showframe=False,
            showcoastlines=False,
            projection_type='equirectangular'
        ),
        annotations = [dict(
            x=0.55,
            y=0.1,
            xref='paper',
            yref='paper',
            text='Source: <a href="https://huggingface.co/spaces/AliMustapha/Geo-GenderStudy">\
                Geo-GenderStudy</a>',
            showarrow = False
        )],
        height=600,  # Adjust the height as per your preference
        width=1000   # Adjust the width as per your preference
    )
    return fig