Spaces:
Runtime error
Runtime error
| from typing import Any, List | |
| import gradio as gr | |
| from toolz import concat, frequencies | |
| import httpx | |
| from functools import lru_cache | |
| import pandas as pd | |
| import plotly.express as px | |
| import polars as pl | |
| from pathlib import Path | |
| from datetime import datetime | |
| librarian_bot_avatar = "https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1674830754237-63d3e0e8ff1384ce6c5dd17d.jpeg?w=200&h=200&f=face" | |
| def get_hub_community_activity(user: str) -> List[Any]: | |
| all_data = [] | |
| for i in range(1, 2000, 100): | |
| r = httpx.get( | |
| f"https://huggingface.co/api/recent-activity?limit=100&type=discussion&skip={i}&user={user}" | |
| ) | |
| activity = r.json()["recentActivity"] | |
| all_data.append(activity) | |
| return list(concat(all_data)) | |
| def get_pr_status(user: str): | |
| all_data = get_hub_community_activity(user) | |
| pr_data = ( | |
| x["discussionData"] for x in all_data if x["discussionData"]["isPullRequest"] | |
| ) | |
| return frequencies(x["status"] for x in pr_data) | |
| def create_pie(): | |
| frequencies = get_pr_status("librarian-bot") | |
| df = pd.DataFrame({"status": frequencies.keys(), "number": frequencies.values()}) | |
| fig = px.pie(df, values="number", names="status", template="seaborn") | |
| return gr.Plot(fig) | |
| def parse_date_time(date_time: str) -> datetime: | |
| return datetime.strptime(date_time, "%Y-%m-%dT%H:%M:%S.%fZ") | |
| def parse_pr_data(data): | |
| data = data["discussionData"] | |
| createdAt = parse_date_time(data["createdAt"]) | |
| pr_number = data["num"] | |
| status = data["status"] | |
| repo_id = data["repo"]["name"] | |
| return { | |
| "createdAt": createdAt, | |
| "pr_number": pr_number, | |
| "status": status, | |
| "repo_id": repo_id, | |
| } | |
| def group_status_by_pr_number(): | |
| all_data = get_hub_community_activity("librarian-bot") | |
| all_data = [parse_pr_data(d) for d in all_data] | |
| return ( | |
| pl.DataFrame(all_data).groupby("status").agg(pl.mean("pr_number")).to_pandas() | |
| ) | |
| def plot_over_time(): | |
| all_data = get_hub_community_activity("librarian-bot") | |
| all_data = [parse_pr_data(d) for d in all_data] | |
| df = pl.DataFrame(all_data).with_columns(pl.col("createdAt").cast(pl.Date)) | |
| df = df.pivot( | |
| values=["status"], | |
| index=["createdAt"], | |
| columns=["status"], | |
| aggregate_function="count", | |
| ) | |
| df = df.fill_null(0) | |
| df = df.with_columns(pl.sum(["open", "closed", "merged"])).sort("createdAt") | |
| df = df.to_pandas().set_index("createdAt").cumsum() | |
| return px.line(df, x=df.index, y=[c for c in df.columns if c != "sum"]) | |
| with gr.Blocks() as demo: | |
| frequencies = get_pr_status("librarian-bot") | |
| gr.HTML(Path("description.html").read_text()) | |
| gr.Markdown(f"Total PRs opened: {sum(frequencies.values())}") | |
| with gr.Column(): | |
| gr.Markdown("## Pull requests Status") | |
| gr.Markdown( | |
| "The below pie chart shows the percentage of pull requests made by librarian bot that are open, closed or merged" | |
| ) | |
| create_pie() | |
| with gr.Column(): | |
| gr.Markdown("## Pull requests status by PR number") | |
| gr.DataFrame(group_status_by_pr_number()) | |
| with gr.Column(): | |
| gr.Plot(plot_over_time()) | |
| demo.launch(debug=True) | |