File size: 3,467 Bytes
d181dd1
 
 
13e5da4
 
 
7249102
13e5da4
0336946
a93aad3
 
13e5da4
 
 
a93aad3
86ee792
13e5da4
 
 
7c42f50
 
de1beba
7c42f50
de1beba
 
7249102
d181dd1
2eccbcd
6623d9f
13e5da4
7c42f50
 
13e5da4
 
 
 
 
 
 
 
 
 
 
 
5f7deee
13e5da4
 
 
 
 
bf620e1
7249102
86ee792
9d378bb
7249102
f919758
66c5935
7249102
c93c82f
a93aad3
a0b0a41
13e5da4
9e52103
13e5da4
7c42f50
13e5da4
49df16f
13e5da4
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
__copyright__ = "Copyright (C) 2023 Ali Mustapha"
__license__ = "GPL-3.0-or-later"

import gradio as gr
from get_gender import GenderPredictor
from GitScraping import CommitInfo
from get_region import RegionPredictor
import pandas as pd
import utils.plot as plot 


class GenderPredictorApp:
    def __init__(self, modelpath):
        self.gender_predictor = GenderPredictor(modelpath)
        self.Region_predictor=RegionPredictor(models_directory="saved_model/Regions")
        
        self.setup_ui()

    def setup_ui(self):
        name = gr.inputs.Textbox(label="Name")
        output = gr.outputs.Textbox(label="Predicted Gender")
        interface1_fn = gr.Interface(fn=self.predict_name, inputs=name, outputs=output, title="GitGender: Exploring Global Gender Disparities in Public Code Contributions",cache_examples=True )
        name = gr.inputs.Textbox(label="Git-url")
        pie_chart_output = gr.Plot(label="Authors by gender")
        histo_chart = gr.Plot(label="Known commits by gender")
        region_commits = gr.Plot(label="Known commits by gender")
        data_output =gr.Dataframe(headers=None,label="Contributers Details")
        interface2_fn = gr.Interface(self.predict_github_url, inputs=name, outputs=[pie_chart_output, histo_chart,region_commits,data_output], title="Determining the Geographic Origin of Public Code Contributors" )
        demo = gr.TabbedInterface([interface1_fn, interface2_fn], ["Test Model", "Exploring Diversity in GitHub Repositories"])
        self.demo = demo
                
        

    def predict_name(self, name):
        prediction, proba = self.gender_predictor.predict_gender(name)
        if prediction == 0:
            prediction = "Male with probability: " + str(proba) + "%"
        elif prediction == 1:
            prediction = "Female with probability: " + str(proba) + "%"
        else:
            prediction = "Unknown or not a name"

        prediction = name + " is " + prediction
        return prediction
    
    def predict_github_url(self, url):
        commit_info = CommitInfo(url)
        df,first_commit_dates = commit_info.get_first_commit_dates()
        first_commit_dates[['Predicted_Gender', 'Confidence']] = first_commit_dates['Author'].apply(lambda name: pd.Series(self.gender_predictor.predict_gender(name)))
        first_commit_dates['Predicted_Gender'] = first_commit_dates['Predicted_Gender'].replace({0: "Male", 1: "Female", 2: "Unknown"})
        Gender_Percentage=plot.get_gender_percentage(first_commit_dates)
        Results=first_commit_dates[first_commit_dates["Predicted_Gender"]!="Unknown"]
        Results=self.Region_predictor.get_region(Results)

        merged_df = df.merge(Results[["Author","sub-region-prediction","Predicted_Gender","Confidence"]], on=["Author"])
        # Group by Year and Predicted_Gender, then count the occurrences
        commit_per_gender_counts = merged_df.groupby(['Year', 'Predicted_Gender']).size().reset_index(name='Count')
        commits_per_gender=plot.get_commits_per_gender(commit_per_gender_counts)
        commits_per_region=plot.get_commits_per_region(merged_df,url)
        
        return Gender_Percentage,commits_per_gender,commits_per_region,Results[["Author","sub-region-prediction","Predicted_Gender","First_Commit_Date"]]
    def launch(self):
        self.demo.launch()


if __name__ == "__main__":
    modelpath = "saved_model/gender_model.tf"
    app = GenderPredictorApp(modelpath)
    app.launch()