Spaces:
Runtime error
Runtime error
| from typing import Any, List | |
| import gradio as gr | |
| from toolz import concat | |
| import httpx | |
| import plotly.express as px | |
| import polars as pl | |
| from pathlib import Path | |
| from datasets import load_dataset | |
| from cachetools import TTLCache, cached | |
| from datetime import datetime, timedelta | |
| from datasets import Dataset | |
| import os | |
| token = os.environ["HUGGINGFACE_TOKEN"] | |
| librarian_bot_avatar = "https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1674830754237-63d3e0e8ff1384ce6c5dd17d.jpeg?w=200&h=200&f=face" | |
| def get_hub_community_activity(user: str) -> List[Any]: | |
| all_data = [] | |
| for i in range(1, 2000, 100): | |
| r = httpx.get( | |
| f"https://huggingface.co/api/recent-activity?limit=100&type=discussion&skip={i}&user={user}" | |
| ) | |
| activity = r.json()["recentActivity"] | |
| all_data.append(activity) | |
| return list(concat(all_data)) | |
| def parse_date_time(date_time: str) -> datetime: | |
| return datetime.strptime(date_time, "%Y-%m-%dT%H:%M:%S.%fZ") | |
| def parse_pr_data(data): | |
| data = data["discussionData"] | |
| createdAt = parse_date_time(data["createdAt"]) | |
| pr_number = data["num"] | |
| status = data["status"] | |
| repo_id = data["repo"]["name"] | |
| repo_type = data["repo"]["type"] | |
| isPullRequest = data["isPullRequest"] | |
| return { | |
| "createdAt": createdAt, | |
| "pr_number": pr_number, | |
| "status": status, | |
| "repo_id": repo_id, | |
| "type": repo_type, | |
| "isPullRequest": isPullRequest, | |
| } | |
| def update_data(): | |
| previous_df = pl.DataFrame( | |
| load_dataset("librarian-bot/stats", split="train").data.table | |
| ) | |
| data = get_hub_community_activity("librarian-bot") | |
| data = [parse_pr_data(d) for d in data] | |
| update_df = pl.DataFrame(data) | |
| df = pl.concat([previous_df, update_df]).unique() | |
| Dataset(df.to_arrow()).push_to_hub("librarian-bot/stats", token=token) | |
| return df | |
| # def get_pr_status(): | |
| # df = update_data() | |
| # df = df.filter(pl.col("isPullRequest") is True) | |
| # return df.select(pl.col("status").value_counts()) | |
| # # return frequencies(x["status"] for x in pr_data) | |
| def create_pie(): | |
| df = update_data() | |
| df = df.filter(pl.col("isPullRequest") is True) | |
| df = df["status"].value_counts().to_pandas() | |
| fig = px.pie(df, values="counts", names="status", template="seaborn") | |
| return gr.Plot(fig) | |
| def group_status_by_pr_number(): | |
| all_data = get_hub_community_activity("librarian-bot") | |
| all_data = [parse_pr_data(d) for d in all_data] | |
| return ( | |
| pl.DataFrame(all_data).groupby("status").agg(pl.mean("pr_number")).to_pandas() | |
| ) | |
| def plot_over_time(): | |
| all_data = get_hub_community_activity("librarian-bot") | |
| all_data = [parse_pr_data(d) for d in all_data] | |
| df = pl.DataFrame(all_data).with_columns(pl.col("createdAt").cast(pl.Date)) | |
| df = df.pivot( | |
| values=["status"], | |
| index=["createdAt"], | |
| columns=["status"], | |
| aggregate_function="count", | |
| ) | |
| df = df.fill_null(0) | |
| df = df.with_columns(pl.sum(["open", "closed", "merged"])).sort("createdAt") | |
| df = df.to_pandas().set_index("createdAt").cumsum() | |
| return px.line(df, x=df.index, y=[c for c in df.columns if c != "sum"]) | |
| with gr.Blocks() as demo: | |
| # frequencies = get_pr_status("librarian-bot") | |
| gr.HTML(Path("description.html").read_text()) | |
| # gr.Markdown(f"Total PRs opened: {sum(frequencies.values())}") | |
| with gr.Column(): | |
| gr.Markdown("## Pull requests Status") | |
| gr.Markdown( | |
| "The below pie chart shows the percentage of pull requests made by" | |
| " librarian bot that are open, closed or merged" | |
| ) | |
| create_pie() | |
| with gr.Column(): | |
| gr.Markdown("Pull requests opened, closed and merged over time (cumulative)") | |
| gr.Plot(plot_over_time()) | |
| with gr.Column(): | |
| gr.Markdown("## Pull requests status by PR number") | |
| gr.DataFrame(group_status_by_pr_number()) | |
| demo.launch(debug=True) | |