Spaces:
Runtime error
Runtime error
Commit ·
7249102
1
Parent(s): ca893a9
add region plot
Browse files- app.py +10 -10
- get_region.py +4 -3
app.py
CHANGED
|
@@ -4,6 +4,7 @@ __license__ = "GPL-3.0-or-later"
|
|
| 4 |
import gradio as gr
|
| 5 |
from get_gender import GenderPredictor
|
| 6 |
from GitScraping import CommitInfo
|
|
|
|
| 7 |
import pandas as pd
|
| 8 |
import utils.plot as plot
|
| 9 |
class GenderPredictorApp:
|
|
@@ -20,9 +21,10 @@ class GenderPredictorApp:
|
|
| 20 |
name = gr.inputs.Textbox(label="Git-url")
|
| 21 |
pie_chart_output = gr.Plot(label="Authors by gender")
|
| 22 |
histo_chart = gr.Plot(label="Known commits by gender")
|
|
|
|
| 23 |
data_output =gr.Dataframe(headers=None,label="Contributers Details")
|
| 24 |
# name_buttom = gr.Button("Predict")
|
| 25 |
-
interface2_fn = gr.Interface(self.predict_github_url, inputs=name, outputs=[pie_chart_output, histo_chart,data_output], title="GitGender: Exploring Global Gender Disparities in Public Code Contributions",cache_examples=True )
|
| 26 |
demo = gr.TabbedInterface([interface1_fn, interface2_fn], ["Test Model", "Exploring Diversity in GitHub Repositories"])
|
| 27 |
self.demo = demo
|
| 28 |
|
|
@@ -42,24 +44,22 @@ class GenderPredictorApp:
|
|
| 42 |
return prediction
|
| 43 |
def predict_github_url(self, url):
|
| 44 |
commit_info = CommitInfo(url)
|
| 45 |
-
|
| 46 |
df,first_commit_dates = commit_info.get_first_commit_dates()
|
| 47 |
first_commit_dates[['Predicted_Gender', 'Confidence']] = first_commit_dates['Author'].apply(lambda name: pd.Series(self.gender_predictor.predict_gender(name)))
|
| 48 |
first_commit_dates['Predicted_Gender'] = first_commit_dates['Predicted_Gender'].replace({0: "Male", 1: "Female", 2: "Unknown"})
|
| 49 |
-
print(first_commit_dates)
|
| 50 |
Gender_Percentage=plot.get_gender_percentage(first_commit_dates)
|
|
|
|
|
|
|
| 51 |
|
| 52 |
-
|
| 53 |
-
# ******************************
|
| 54 |
-
merged_df = df.merge(first_commit_dates[["Author","Predicted_Gender","Confidence"]], on=["Author"])
|
| 55 |
# Group by Year and Predicted_Gender, then count the occurrences
|
| 56 |
commit_per_gender_counts = merged_df.groupby(['Year', 'Predicted_Gender']).size().reset_index(name='Count')
|
|
|
|
|
|
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
fig=plot.get_commits_per_gender(commit_per_gender_counts)
|
| 61 |
# Convert the chart to HTML and return it
|
| 62 |
-
return Gender_Percentage,
|
| 63 |
def launch(self):
|
| 64 |
self.demo.launch()
|
| 65 |
|
|
|
|
| 4 |
import gradio as gr
|
| 5 |
from get_gender import GenderPredictor
|
| 6 |
from GitScraping import CommitInfo
|
| 7 |
+
from get_region import RegionPredictor
|
| 8 |
import pandas as pd
|
| 9 |
import utils.plot as plot
|
| 10 |
class GenderPredictorApp:
|
|
|
|
| 21 |
name = gr.inputs.Textbox(label="Git-url")
|
| 22 |
pie_chart_output = gr.Plot(label="Authors by gender")
|
| 23 |
histo_chart = gr.Plot(label="Known commits by gender")
|
| 24 |
+
region_commits = gr.Plot(label="Known commits by gender")
|
| 25 |
data_output =gr.Dataframe(headers=None,label="Contributers Details")
|
| 26 |
# name_buttom = gr.Button("Predict")
|
| 27 |
+
interface2_fn = gr.Interface(self.predict_github_url, inputs=name, outputs=[pie_chart_output, histo_chart,region_commits,data_output], title="GitGender: Exploring Global Gender Disparities in Public Code Contributions",cache_examples=True )
|
| 28 |
demo = gr.TabbedInterface([interface1_fn, interface2_fn], ["Test Model", "Exploring Diversity in GitHub Repositories"])
|
| 29 |
self.demo = demo
|
| 30 |
|
|
|
|
| 44 |
return prediction
|
| 45 |
def predict_github_url(self, url):
|
| 46 |
commit_info = CommitInfo(url)
|
| 47 |
+
Region_predictor=RegionPredictor("saved_model/Regions")
|
| 48 |
df,first_commit_dates = commit_info.get_first_commit_dates()
|
| 49 |
first_commit_dates[['Predicted_Gender', 'Confidence']] = first_commit_dates['Author'].apply(lambda name: pd.Series(self.gender_predictor.predict_gender(name)))
|
| 50 |
first_commit_dates['Predicted_Gender'] = first_commit_dates['Predicted_Gender'].replace({0: "Male", 1: "Female", 2: "Unknown"})
|
|
|
|
| 51 |
Gender_Percentage=plot.get_gender_percentage(first_commit_dates)
|
| 52 |
+
Results=first_commit_dates[first_commit_dates["Predicted_Gender"]!="Unknown"]
|
| 53 |
+
Results=Region_predictor.get_region(Results)
|
| 54 |
|
| 55 |
+
merged_df = df.merge(Results[["Author","sub-region-prediction","Predicted_Gender","Confidence"]], on=["Author"])
|
|
|
|
|
|
|
| 56 |
# Group by Year and Predicted_Gender, then count the occurrences
|
| 57 |
commit_per_gender_counts = merged_df.groupby(['Year', 'Predicted_Gender']).size().reset_index(name='Count')
|
| 58 |
+
commits_per_gender=plot.get_commits_per_gender(commit_per_gender_counts)
|
| 59 |
+
commits_per_region=plot.get_commits_per_gender(merged_df,url)
|
| 60 |
|
|
|
|
|
|
|
|
|
|
| 61 |
# Convert the chart to HTML and return it
|
| 62 |
+
return Gender_Percentage,commits_per_gender,Results[["Author","First_Commit_Date","sub-region-prediction","Predicted_Gender"]],commits_per_region
|
| 63 |
def launch(self):
|
| 64 |
self.demo.launch()
|
| 65 |
|
get_region.py
CHANGED
|
@@ -27,8 +27,8 @@ class RegionPredictor:
|
|
| 27 |
|
| 28 |
|
| 29 |
def model_prediction(self, dataset,model,label_encoder,optF1=None,optROC=None):
|
| 30 |
-
input_Full_name=np.asarray(dataset['
|
| 31 |
-
input_offset=np.asarray(dataset['
|
| 32 |
predictions_proba = model.predict({
|
| 33 |
"input_text": input_Full_name,
|
| 34 |
"input_offset": input_offset
|
|
@@ -51,6 +51,7 @@ class RegionPredictor:
|
|
| 51 |
return y_pred_F1,y_pred_ROC
|
| 52 |
|
| 53 |
def get_region(self,dataset):
|
|
|
|
| 54 |
model,label_encoder,optF1,optROC=self.loadModel(self.models_directory+"/region/files/")
|
| 55 |
y_pred,_=self.model_prediction(dataset,model,label_encoder,optF1,optROC)
|
| 56 |
dataset["region-prediction"]=y_pred
|
|
@@ -67,7 +68,7 @@ class RegionPredictor:
|
|
| 67 |
y_pred,_=self.model_prediction(Asia,model,label_encoder,optF1,optROC)
|
| 68 |
Asia["sub-region-prediction"]=y_pred
|
| 69 |
model,label_encoder,optF1,optROC=self.loadModel(self.models_directory+"/Americas/files/")
|
| 70 |
-
y_pred=self.model_prediction(Americas,model,label_encoder,optF1,optROC)
|
| 71 |
Americas["sub-region-prediction"]=y_pred
|
| 72 |
Oceania["sub-region-prediction"]="Australia and New Zealand"
|
| 73 |
Africa["sub-region-prediction"]="Africa"
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
def model_prediction(self, dataset,model,label_encoder,optF1=None,optROC=None):
|
| 30 |
+
input_Full_name=np.asarray(dataset['Author']).astype('str')
|
| 31 |
+
input_offset=np.asarray(dataset['Author_Timezone']).astype('float')
|
| 32 |
predictions_proba = model.predict({
|
| 33 |
"input_text": input_Full_name,
|
| 34 |
"input_offset": input_offset
|
|
|
|
| 51 |
return y_pred_F1,y_pred_ROC
|
| 52 |
|
| 53 |
def get_region(self,dataset):
|
| 54 |
+
dataset["Author_Timezone"]= dataset["Author_Timezone"] /60
|
| 55 |
model,label_encoder,optF1,optROC=self.loadModel(self.models_directory+"/region/files/")
|
| 56 |
y_pred,_=self.model_prediction(dataset,model,label_encoder,optF1,optROC)
|
| 57 |
dataset["region-prediction"]=y_pred
|
|
|
|
| 68 |
y_pred,_=self.model_prediction(Asia,model,label_encoder,optF1,optROC)
|
| 69 |
Asia["sub-region-prediction"]=y_pred
|
| 70 |
model,label_encoder,optF1,optROC=self.loadModel(self.models_directory+"/Americas/files/")
|
| 71 |
+
y_pred,_=self.model_prediction(Americas,model,label_encoder,optF1,optROC)
|
| 72 |
Americas["sub-region-prediction"]=y_pred
|
| 73 |
Oceania["sub-region-prediction"]="Australia and New Zealand"
|
| 74 |
Africa["sub-region-prediction"]="Africa"
|