AliMustapha commited on
Commit
7249102
·
1 Parent(s): ca893a9

add region plot

Browse files
Files changed (2) hide show
  1. app.py +10 -10
  2. get_region.py +4 -3
app.py CHANGED
@@ -4,6 +4,7 @@ __license__ = "GPL-3.0-or-later"
4
  import gradio as gr
5
  from get_gender import GenderPredictor
6
  from GitScraping import CommitInfo
 
7
  import pandas as pd
8
  import utils.plot as plot
9
  class GenderPredictorApp:
@@ -20,9 +21,10 @@ class GenderPredictorApp:
20
  name = gr.inputs.Textbox(label="Git-url")
21
  pie_chart_output = gr.Plot(label="Authors by gender")
22
  histo_chart = gr.Plot(label="Known commits by gender")
 
23
  data_output =gr.Dataframe(headers=None,label="Contributers Details")
24
  # name_buttom = gr.Button("Predict")
25
- interface2_fn = gr.Interface(self.predict_github_url, inputs=name, outputs=[pie_chart_output, histo_chart,data_output], title="GitGender: Exploring Global Gender Disparities in Public Code Contributions",cache_examples=True )
26
  demo = gr.TabbedInterface([interface1_fn, interface2_fn], ["Test Model", "Exploring Diversity in GitHub Repositories"])
27
  self.demo = demo
28
 
@@ -42,24 +44,22 @@ class GenderPredictorApp:
42
  return prediction
43
  def predict_github_url(self, url):
44
  commit_info = CommitInfo(url)
45
- print(url)
46
  df,first_commit_dates = commit_info.get_first_commit_dates()
47
  first_commit_dates[['Predicted_Gender', 'Confidence']] = first_commit_dates['Author'].apply(lambda name: pd.Series(self.gender_predictor.predict_gender(name)))
48
  first_commit_dates['Predicted_Gender'] = first_commit_dates['Predicted_Gender'].replace({0: "Male", 1: "Female", 2: "Unknown"})
49
- print(first_commit_dates)
50
  Gender_Percentage=plot.get_gender_percentage(first_commit_dates)
 
 
51
 
52
-
53
- # ******************************
54
- merged_df = df.merge(first_commit_dates[["Author","Predicted_Gender","Confidence"]], on=["Author"])
55
  # Group by Year and Predicted_Gender, then count the occurrences
56
  commit_per_gender_counts = merged_df.groupby(['Year', 'Predicted_Gender']).size().reset_index(name='Count')
 
 
57
 
58
-
59
-
60
- fig=plot.get_commits_per_gender(commit_per_gender_counts)
61
  # Convert the chart to HTML and return it
62
- return Gender_Percentage,fig,first_commit_dates[["Author","First_Commit_Date","Predicted_Gender"]]
63
  def launch(self):
64
  self.demo.launch()
65
 
 
4
  import gradio as gr
5
  from get_gender import GenderPredictor
6
  from GitScraping import CommitInfo
7
+ from get_region import RegionPredictor
8
  import pandas as pd
9
  import utils.plot as plot
10
  class GenderPredictorApp:
 
21
  name = gr.inputs.Textbox(label="Git-url")
22
  pie_chart_output = gr.Plot(label="Authors by gender")
23
  histo_chart = gr.Plot(label="Known commits by gender")
24
+ region_commits = gr.Plot(label="Known commits by gender")
25
  data_output =gr.Dataframe(headers=None,label="Contributers Details")
26
  # name_buttom = gr.Button("Predict")
27
+ interface2_fn = gr.Interface(self.predict_github_url, inputs=name, outputs=[pie_chart_output, histo_chart,region_commits,data_output], title="GitGender: Exploring Global Gender Disparities in Public Code Contributions",cache_examples=True )
28
  demo = gr.TabbedInterface([interface1_fn, interface2_fn], ["Test Model", "Exploring Diversity in GitHub Repositories"])
29
  self.demo = demo
30
 
 
44
  return prediction
45
  def predict_github_url(self, url):
46
  commit_info = CommitInfo(url)
47
+ Region_predictor=RegionPredictor("saved_model/Regions")
48
  df,first_commit_dates = commit_info.get_first_commit_dates()
49
  first_commit_dates[['Predicted_Gender', 'Confidence']] = first_commit_dates['Author'].apply(lambda name: pd.Series(self.gender_predictor.predict_gender(name)))
50
  first_commit_dates['Predicted_Gender'] = first_commit_dates['Predicted_Gender'].replace({0: "Male", 1: "Female", 2: "Unknown"})
 
51
  Gender_Percentage=plot.get_gender_percentage(first_commit_dates)
52
+ Results=first_commit_dates[first_commit_dates["Predicted_Gender"]!="Unknown"]
53
+ Results=Region_predictor.get_region(Results)
54
 
55
+ merged_df = df.merge(Results[["Author","sub-region-prediction","Predicted_Gender","Confidence"]], on=["Author"])
 
 
56
  # Group by Year and Predicted_Gender, then count the occurrences
57
  commit_per_gender_counts = merged_df.groupby(['Year', 'Predicted_Gender']).size().reset_index(name='Count')
58
+ commits_per_gender=plot.get_commits_per_gender(commit_per_gender_counts)
59
+ commits_per_region=plot.get_commits_per_gender(merged_df,url)
60
 
 
 
 
61
  # Convert the chart to HTML and return it
62
+ return Gender_Percentage,commits_per_gender,Results[["Author","First_Commit_Date","sub-region-prediction","Predicted_Gender"]],commits_per_region
63
  def launch(self):
64
  self.demo.launch()
65
 
get_region.py CHANGED
@@ -27,8 +27,8 @@ class RegionPredictor:
27
 
28
 
29
  def model_prediction(self, dataset,model,label_encoder,optF1=None,optROC=None):
30
- input_Full_name=np.asarray(dataset['Full_Name']).astype('str')
31
- input_offset=np.asarray(dataset['offset']).astype('float')
32
  predictions_proba = model.predict({
33
  "input_text": input_Full_name,
34
  "input_offset": input_offset
@@ -51,6 +51,7 @@ class RegionPredictor:
51
  return y_pred_F1,y_pred_ROC
52
 
53
  def get_region(self,dataset):
 
54
  model,label_encoder,optF1,optROC=self.loadModel(self.models_directory+"/region/files/")
55
  y_pred,_=self.model_prediction(dataset,model,label_encoder,optF1,optROC)
56
  dataset["region-prediction"]=y_pred
@@ -67,7 +68,7 @@ class RegionPredictor:
67
  y_pred,_=self.model_prediction(Asia,model,label_encoder,optF1,optROC)
68
  Asia["sub-region-prediction"]=y_pred
69
  model,label_encoder,optF1,optROC=self.loadModel(self.models_directory+"/Americas/files/")
70
- y_pred=self.model_prediction(Americas,model,label_encoder,optF1,optROC)
71
  Americas["sub-region-prediction"]=y_pred
72
  Oceania["sub-region-prediction"]="Australia and New Zealand"
73
  Africa["sub-region-prediction"]="Africa"
 
27
 
28
 
29
  def model_prediction(self, dataset,model,label_encoder,optF1=None,optROC=None):
30
+ input_Full_name=np.asarray(dataset['Author']).astype('str')
31
+ input_offset=np.asarray(dataset['Author_Timezone']).astype('float')
32
  predictions_proba = model.predict({
33
  "input_text": input_Full_name,
34
  "input_offset": input_offset
 
51
  return y_pred_F1,y_pred_ROC
52
 
53
  def get_region(self,dataset):
54
+ dataset["Author_Timezone"]= dataset["Author_Timezone"] /60
55
  model,label_encoder,optF1,optROC=self.loadModel(self.models_directory+"/region/files/")
56
  y_pred,_=self.model_prediction(dataset,model,label_encoder,optF1,optROC)
57
  dataset["region-prediction"]=y_pred
 
68
  y_pred,_=self.model_prediction(Asia,model,label_encoder,optF1,optROC)
69
  Asia["sub-region-prediction"]=y_pred
70
  model,label_encoder,optF1,optROC=self.loadModel(self.models_directory+"/Americas/files/")
71
+ y_pred,_=self.model_prediction(Americas,model,label_encoder,optF1,optROC)
72
  Americas["sub-region-prediction"]=y_pred
73
  Oceania["sub-region-prediction"]="Australia and New Zealand"
74
  Africa["sub-region-prediction"]="Africa"