Spaces:
Runtime error
Runtime error
Commit ·
5a192b9
1
Parent(s): 4ceac99
fixed
Browse files- GitScraping.py +23 -7
- plot.py +3 -3
GitScraping.py
CHANGED
|
@@ -49,22 +49,38 @@ if __name__ == "__main__":
|
|
| 49 |
|
| 50 |
gender_predictor = GenderPredictor(modelpath)
|
| 51 |
|
| 52 |
-
repo_url = 'https://
|
| 53 |
commit_info = CommitInfo(repo_url)
|
| 54 |
|
| 55 |
|
| 56 |
# Get and print the DataFrame with first commit dates for each author
|
| 57 |
df,first_commit_dates = commit_info.get_first_commit_dates()
|
| 58 |
first_commit_dates[['Predicted_Gender', 'Confidence']] = first_commit_dates['Author'].apply(lambda name: pd.Series(gender_predictor.predict_gender(name)))
|
|
|
|
|
|
|
| 59 |
merged_df = df.merge(first_commit_dates[["Author","Predicted_Gender","Confidence"]], on=["Author"])
|
| 60 |
# Group by Year and Predicted_Gender, then count the occurrences
|
|
|
|
|
|
|
|
|
|
| 61 |
gender_counts = merged_df.groupby(['Year', 'Predicted_Gender']).size().reset_index(name='Count')
|
| 62 |
|
| 63 |
-
counts = first_commit_dates['Predicted_Gender'].value_counts()
|
| 64 |
-
labels = ["Male", "Female", "Unknown"]
|
| 65 |
-
colors = ["blue", "pink", "red"]
|
| 66 |
-
fig = go.Figure(data=[go.Pie(labels=first_commit_dates['Predicted_Gender'].unique(), values=counts, marker=dict(colors=colors))])
|
| 67 |
-
# Convert the chart to HTML and return it
|
| 68 |
-
chart_html = fig.to_html(full_html=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
|
|
|
| 70 |
print(first_commit_dates)
|
|
|
|
| 49 |
|
| 50 |
gender_predictor = GenderPredictor(modelpath)
|
| 51 |
|
| 52 |
+
repo_url = 'https://gitlab.com/zacchiro/debian-firewoes'
|
| 53 |
commit_info = CommitInfo(repo_url)
|
| 54 |
|
| 55 |
|
| 56 |
# Get and print the DataFrame with first commit dates for each author
|
| 57 |
df,first_commit_dates = commit_info.get_first_commit_dates()
|
| 58 |
first_commit_dates[['Predicted_Gender', 'Confidence']] = first_commit_dates['Author'].apply(lambda name: pd.Series(gender_predictor.predict_gender(name)))
|
| 59 |
+
first_commit_dates['Predicted_Gender'] = first_commit_dates['Predicted_Gender'].replace({0: "Male", 1: "Female", 2: "Unknown"})
|
| 60 |
+
|
| 61 |
merged_df = df.merge(first_commit_dates[["Author","Predicted_Gender","Confidence"]], on=["Author"])
|
| 62 |
# Group by Year and Predicted_Gender, then count the occurrences
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
|
| 66 |
gender_counts = merged_df.groupby(['Year', 'Predicted_Gender']).size().reset_index(name='Count')
|
| 67 |
|
| 68 |
+
# counts = first_commit_dates['Predicted_Gender'].value_counts()
|
| 69 |
+
# labels = ["Male", "Female", "Unknown"]
|
| 70 |
+
# colors = ["blue", "pink", "red"]
|
| 71 |
+
# fig = go.Figure(data=[go.Pie(labels=first_commit_dates['Predicted_Gender'].unique(), values=counts, marker=dict(colors=colors))])
|
| 72 |
+
# # Convert the chart to HTML and return it
|
| 73 |
+
# chart_html = fig.to_html(full_html=False)
|
| 74 |
+
gender_counts=gender_counts[gender_counts["Predicted_Gender"]!="Unknown"]
|
| 75 |
+
|
| 76 |
+
grouped = gender_counts.groupby('Year').agg({'Count': 'sum'})
|
| 77 |
+
grouped['Male Percentage'] = gender_counts[gender_counts['Predicted_Gender'] == 'Male'].groupby('Year')['Count'].sum() / grouped['Count'] * 100
|
| 78 |
+
grouped['Female Percentage'] = gender_counts[gender_counts['Predicted_Gender'] == 'Female'].groupby('Year')['Count'].sum() / grouped['Count'] * 100
|
| 79 |
+
grouped=grouped.fillna(0)
|
| 80 |
+
merged_gender_counts = grouped.merge(gender_counts, on=["Year"])[['Year', 'Male Percentage', 'Female Percentage',
|
| 81 |
+
'Predicted_Gender', 'Count_y']].rename(columns={"Count_y":"Count"})
|
| 82 |
+
|
| 83 |
+
male_count=merged_gender_counts[merged_gender_counts["Predicted_Gender"]=="Male"]
|
| 84 |
|
| 85 |
+
female_count=merged_gender_counts[merged_gender_counts["Predicted_Gender"]=="Female"]
|
| 86 |
print(first_commit_dates)
|
plot.py
CHANGED
|
@@ -8,7 +8,8 @@ def get_commits_per_gender(gender_counts):
|
|
| 8 |
grouped['Male Percentage'] = gender_counts[gender_counts['Predicted_Gender'] == 'Male'].groupby('Year')['Count'].sum() / grouped['Count'] * 100
|
| 9 |
grouped['Female Percentage'] = gender_counts[gender_counts['Predicted_Gender'] == 'Female'].groupby('Year')['Count'].sum() / grouped['Count'] * 100
|
| 10 |
grouped=grouped.fillna(0)
|
| 11 |
-
merged_gender_counts = grouped.merge(gender_counts
|
|
|
|
| 12 |
|
| 13 |
male_count=merged_gender_counts[merged_gender_counts["Predicted_Gender"]=="Male"]
|
| 14 |
female_count=merged_gender_counts[merged_gender_counts["Predicted_Gender"]=="Female"]
|
|
@@ -33,5 +34,4 @@ def get_gender_percentage(df):
|
|
| 33 |
counts = df['Predicted_Gender'].value_counts()
|
| 34 |
colors = ["blue", "pink", "gray"]
|
| 35 |
Gender_Percentage_plot = go.Figure(data=[go.Pie(labels=df['Predicted_Gender'].unique(), values=counts, marker=dict(colors=colors))])
|
| 36 |
-
return Gender_Percentage_plot
|
| 37 |
-
|
|
|
|
| 8 |
grouped['Male Percentage'] = gender_counts[gender_counts['Predicted_Gender'] == 'Male'].groupby('Year')['Count'].sum() / grouped['Count'] * 100
|
| 9 |
grouped['Female Percentage'] = gender_counts[gender_counts['Predicted_Gender'] == 'Female'].groupby('Year')['Count'].sum() / grouped['Count'] * 100
|
| 10 |
grouped=grouped.fillna(0)
|
| 11 |
+
merged_gender_counts = grouped.merge(gender_counts, on=["Year"])[['Year', 'Male Percentage', 'Female Percentage',
|
| 12 |
+
'Predicted_Gender', 'Count_y']].rename(columns={"Count_y":"Count"})
|
| 13 |
|
| 14 |
male_count=merged_gender_counts[merged_gender_counts["Predicted_Gender"]=="Male"]
|
| 15 |
female_count=merged_gender_counts[merged_gender_counts["Predicted_Gender"]=="Female"]
|
|
|
|
| 34 |
counts = df['Predicted_Gender'].value_counts()
|
| 35 |
colors = ["blue", "pink", "gray"]
|
| 36 |
Gender_Percentage_plot = go.Figure(data=[go.Pie(labels=df['Predicted_Gender'].unique(), values=counts, marker=dict(colors=colors))])
|
| 37 |
+
return Gender_Percentage_plot
|
|
|