AliMustapha commited on
Commit
5a192b9
·
1 Parent(s): 4ceac99
Files changed (2) hide show
  1. GitScraping.py +23 -7
  2. plot.py +3 -3
GitScraping.py CHANGED
@@ -49,22 +49,38 @@ if __name__ == "__main__":
49
 
50
  gender_predictor = GenderPredictor(modelpath)
51
 
52
- repo_url = 'https://github.com/Amstf/DDoS-Attacks-Detection-Using-Adversarial-Neural-Network'
53
  commit_info = CommitInfo(repo_url)
54
 
55
 
56
  # Get and print the DataFrame with first commit dates for each author
57
  df,first_commit_dates = commit_info.get_first_commit_dates()
58
  first_commit_dates[['Predicted_Gender', 'Confidence']] = first_commit_dates['Author'].apply(lambda name: pd.Series(gender_predictor.predict_gender(name)))
 
 
59
  merged_df = df.merge(first_commit_dates[["Author","Predicted_Gender","Confidence"]], on=["Author"])
60
  # Group by Year and Predicted_Gender, then count the occurrences
 
 
 
61
  gender_counts = merged_df.groupby(['Year', 'Predicted_Gender']).size().reset_index(name='Count')
62
 
63
- counts = first_commit_dates['Predicted_Gender'].value_counts()
64
- labels = ["Male", "Female", "Unknown"]
65
- colors = ["blue", "pink", "red"]
66
- fig = go.Figure(data=[go.Pie(labels=first_commit_dates['Predicted_Gender'].unique(), values=counts, marker=dict(colors=colors))])
67
- # Convert the chart to HTML and return it
68
- chart_html = fig.to_html(full_html=False)
 
 
 
 
 
 
 
 
 
 
69
 
 
70
  print(first_commit_dates)
 
49
 
50
  gender_predictor = GenderPredictor(modelpath)
51
 
52
+ repo_url = 'https://gitlab.com/zacchiro/debian-firewoes'
53
  commit_info = CommitInfo(repo_url)
54
 
55
 
56
  # Get and print the DataFrame with first commit dates for each author
57
  df,first_commit_dates = commit_info.get_first_commit_dates()
58
  first_commit_dates[['Predicted_Gender', 'Confidence']] = first_commit_dates['Author'].apply(lambda name: pd.Series(gender_predictor.predict_gender(name)))
59
+ first_commit_dates['Predicted_Gender'] = first_commit_dates['Predicted_Gender'].replace({0: "Male", 1: "Female", 2: "Unknown"})
60
+
61
  merged_df = df.merge(first_commit_dates[["Author","Predicted_Gender","Confidence"]], on=["Author"])
62
  # Group by Year and Predicted_Gender, then count the occurrences
63
+
64
+
65
+
66
  gender_counts = merged_df.groupby(['Year', 'Predicted_Gender']).size().reset_index(name='Count')
67
 
68
+ # counts = first_commit_dates['Predicted_Gender'].value_counts()
69
+ # labels = ["Male", "Female", "Unknown"]
70
+ # colors = ["blue", "pink", "red"]
71
+ # fig = go.Figure(data=[go.Pie(labels=first_commit_dates['Predicted_Gender'].unique(), values=counts, marker=dict(colors=colors))])
72
+ # # Convert the chart to HTML and return it
73
+ # chart_html = fig.to_html(full_html=False)
74
+ gender_counts=gender_counts[gender_counts["Predicted_Gender"]!="Unknown"]
75
+
76
+ grouped = gender_counts.groupby('Year').agg({'Count': 'sum'})
77
+ grouped['Male Percentage'] = gender_counts[gender_counts['Predicted_Gender'] == 'Male'].groupby('Year')['Count'].sum() / grouped['Count'] * 100
78
+ grouped['Female Percentage'] = gender_counts[gender_counts['Predicted_Gender'] == 'Female'].groupby('Year')['Count'].sum() / grouped['Count'] * 100
79
+ grouped=grouped.fillna(0)
80
+ merged_gender_counts = grouped.merge(gender_counts, on=["Year"])[['Year', 'Male Percentage', 'Female Percentage',
81
+ 'Predicted_Gender', 'Count_y']].rename(columns={"Count_y":"Count"})
82
+
83
+ male_count=merged_gender_counts[merged_gender_counts["Predicted_Gender"]=="Male"]
84
 
85
+ female_count=merged_gender_counts[merged_gender_counts["Predicted_Gender"]=="Female"]
86
  print(first_commit_dates)
plot.py CHANGED
@@ -8,7 +8,8 @@ def get_commits_per_gender(gender_counts):
8
  grouped['Male Percentage'] = gender_counts[gender_counts['Predicted_Gender'] == 'Male'].groupby('Year')['Count'].sum() / grouped['Count'] * 100
9
  grouped['Female Percentage'] = gender_counts[gender_counts['Predicted_Gender'] == 'Female'].groupby('Year')['Count'].sum() / grouped['Count'] * 100
10
  grouped=grouped.fillna(0)
11
- merged_gender_counts = grouped.merge(gender_counts[["Year","Predicted_Gender"]], on=["Year"])
 
12
 
13
  male_count=merged_gender_counts[merged_gender_counts["Predicted_Gender"]=="Male"]
14
  female_count=merged_gender_counts[merged_gender_counts["Predicted_Gender"]=="Female"]
@@ -33,5 +34,4 @@ def get_gender_percentage(df):
33
  counts = df['Predicted_Gender'].value_counts()
34
  colors = ["blue", "pink", "gray"]
35
  Gender_Percentage_plot = go.Figure(data=[go.Pie(labels=df['Predicted_Gender'].unique(), values=counts, marker=dict(colors=colors))])
36
- return Gender_Percentage_plot
37
-
 
8
  grouped['Male Percentage'] = gender_counts[gender_counts['Predicted_Gender'] == 'Male'].groupby('Year')['Count'].sum() / grouped['Count'] * 100
9
  grouped['Female Percentage'] = gender_counts[gender_counts['Predicted_Gender'] == 'Female'].groupby('Year')['Count'].sum() / grouped['Count'] * 100
10
  grouped=grouped.fillna(0)
11
+ merged_gender_counts = grouped.merge(gender_counts, on=["Year"])[['Year', 'Male Percentage', 'Female Percentage',
12
+ 'Predicted_Gender', 'Count_y']].rename(columns={"Count_y":"Count"})
13
 
14
  male_count=merged_gender_counts[merged_gender_counts["Predicted_Gender"]=="Male"]
15
  female_count=merged_gender_counts[merged_gender_counts["Predicted_Gender"]=="Female"]
 
34
  counts = df['Predicted_Gender'].value_counts()
35
  colors = ["blue", "pink", "gray"]
36
  Gender_Percentage_plot = go.Figure(data=[go.Pie(labels=df['Predicted_Gender'].unique(), values=counts, marker=dict(colors=colors))])
37
+ return Gender_Percentage_plot