Geo-GenderStudy / app.py
AliMustapha's picture
Update app.py
2eccbcd verified
__copyright__ = "Copyright (C) 2023 Ali Mustapha"
__license__ = "GPL-3.0-or-later"
import gradio as gr
from get_gender import GenderPredictor
from GitScraping import CommitInfo
from get_region import RegionPredictor
import pandas as pd
import utils.plot as plot
class GenderPredictorApp:
def __init__(self, modelpath):
self.gender_predictor = GenderPredictor(modelpath)
self.Region_predictor=RegionPredictor(models_directory="saved_model/Regions")
self.setup_ui()
def setup_ui(self):
name = gr.inputs.Textbox(label="Name")
output = gr.outputs.Textbox(label="Predicted Gender")
interface1_fn = gr.Interface(fn=self.predict_name, inputs=name, outputs=output, title="GitGender: Exploring Global Gender Disparities in Public Code Contributions",cache_examples=True )
name = gr.inputs.Textbox(label="Git-url")
pie_chart_output = gr.Plot(label="Authors by gender")
histo_chart = gr.Plot(label="Known commits by gender")
region_commits = gr.Plot(label="Known commits by gender")
data_output =gr.Dataframe(headers=None,label="Contributers Details")
interface2_fn = gr.Interface(self.predict_github_url, inputs=name, outputs=[pie_chart_output, histo_chart,region_commits,data_output], title="Determining the Geographic Origin of Public Code Contributors" )
demo = gr.TabbedInterface([interface1_fn, interface2_fn], ["Test Model", "Exploring Diversity in GitHub Repositories"])
self.demo = demo
def predict_name(self, name):
prediction, proba = self.gender_predictor.predict_gender(name)
if prediction == 0:
prediction = "Male with probability: " + str(proba) + "%"
elif prediction == 1:
prediction = "Female with probability: " + str(proba) + "%"
else:
prediction = "Unknown or not a name"
prediction = name + " is " + prediction
return prediction
def predict_github_url(self, url):
commit_info = CommitInfo(url)
df,first_commit_dates = commit_info.get_first_commit_dates()
first_commit_dates[['Predicted_Gender', 'Confidence']] = first_commit_dates['Author'].apply(lambda name: pd.Series(self.gender_predictor.predict_gender(name)))
first_commit_dates['Predicted_Gender'] = first_commit_dates['Predicted_Gender'].replace({0: "Male", 1: "Female", 2: "Unknown"})
Gender_Percentage=plot.get_gender_percentage(first_commit_dates)
Results=first_commit_dates[first_commit_dates["Predicted_Gender"]!="Unknown"]
Results=self.Region_predictor.get_region(Results)
merged_df = df.merge(Results[["Author","sub-region-prediction","Predicted_Gender","Confidence"]], on=["Author"])
# Group by Year and Predicted_Gender, then count the occurrences
commit_per_gender_counts = merged_df.groupby(['Year', 'Predicted_Gender']).size().reset_index(name='Count')
commits_per_gender=plot.get_commits_per_gender(commit_per_gender_counts)
commits_per_region=plot.get_commits_per_region(merged_df,url)
return Gender_Percentage,commits_per_gender,commits_per_region,Results[["Author","sub-region-prediction","Predicted_Gender","First_Commit_Date"]]
def launch(self):
self.demo.launch()
if __name__ == "__main__":
modelpath = "saved_model/gender_model.tf"
app = GenderPredictorApp(modelpath)
app.launch()