Spaces:

lschlessinger
/

usatt-rating-analyzer

Running

App Files Files Community

lschlessinger commited on Dec 7, 2022

Commit

a02aab5

1 Parent(s): 795aaee

Upload 2 files

Browse files

Files changed (2) hide show

app.py +226 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,226 @@

+from typing import Optional, Tuple
+import gradio as gr
+import pandas as pd
+from pathlib import Path
+import seaborn as sns
+import matplotlib.pyplot as plt
+from wordcloud import WordCloud
+import numpy as np
+def _rename_columns(df: pd.DataFrame, is_tournament: bool) -> pd.DataFrame:
+    columns = {
+        "Rating": "rating",
+        "Result": "result",
+        "Scores": "scores",
+        "Opponent": "opponent",
+        "OpponentRating": "opponent_rating",
+    }
+    if is_tournament:
+        columns.update({
+            "TournamentStartDate": "tournament_start_date",
+            "TournamentEndDate": "tournament_end_date",
+            " Touranament": "tournament",
+        })
+    else:
+        columns.update({
+            "EventDate": "event_date",
+            "LeagueName": "league_name"
+        })
+    return df.rename(columns=columns)
+def _fix_dtypes(df: pd.DataFrame, is_tournament: bool) -> pd.DataFrame:
+    if is_tournament:
+        df["tournament_start_date"] = pd.to_datetime(df["tournament_start_date"])
+        df["tournament_end_date"] = pd.to_datetime(df["tournament_end_date"])
+        df["tournament"] = df["tournament"].astype('category')
+    else:
+        df["event_date"] = pd.to_datetime(df["event_date"])
+        df["league_name"] = df["league_name"].astype('string')
+    df["rating"] = df["rating"].astype('int')
+    df["result"] = df["result"].astype('category')
+    df["scores"] = df["scores"].astype('string')
+    df["opponent"] = df["opponent"].astype('category')
+    df["opponent_rating"] = df["opponent_rating"].astype('int')
+    return df
+def _check_match_type(match_type: str) -> str:
+    allowed_match_types = {"tournament", "league"}
+    if match_type not in allowed_match_types:
+        raise ValueError(
+            f"The only supported match types are {allowed_match_types}. Found match type of '{match_type}'.")
+    return match_type
+def get_num_competitions_played(df: pd.DataFrame, is_tournament: bool) -> int:
+    key_name = "tournament" if is_tournament else "event_date"
+    return df[key_name].nunique()
+def get_matches_per_competition_fig(df: pd.DataFrame, is_tournament: bool):
+    fig = plt.figure()
+    plt.title('Matches per competition')
+    sns.histplot(df.groupby('tournament' if is_tournament else "event_date").size())
+    plt.xlabel('Number of matches in competition')
+    return fig
+def get_competition_name_word_cloud_fig(df: pd.DataFrame, is_tournament: bool):
+    fig = plt.figure()
+    key_name = "tournament" if is_tournament else "league_name"
+    wordcloud = WordCloud().generate(" ".join(df[key_name].values.tolist()))
+    plt.imshow(wordcloud, interpolation='bilinear')
+    plt.axis("off")
+    return fig
+def get_opponent_name_word_cloud_fig(df: pd.DataFrame):
+    fig = plt.figure()
+    wordcloud = WordCloud().generate(" ".join(df.opponent.values.tolist()))
+    plt.imshow(wordcloud, interpolation='bilinear')
+    plt.axis("off")
+    return fig
+def get_rating_over_time_fig(df: pd.DataFrame, is_tournament: bool):
+    fig = plt.figure()
+    plt.title('Rating over time')
+    sns.lineplot(data=df,
+                 x="tournament_end_date" if is_tournament else "event_date",
+                 y="rating",
+                 marker='.',
+                 markersize=10)
+    plt.xlabel('Competition date')
+    plt.ylabel('Rating')
+    return fig
+def get_max_int(int_csv_str: str) -> int:
+    """Get the max int from an int CSV."""
+    ints = [int(i.strip()) for i in int_csv_str.split(',')]
+    return max(ints)
+def get_match_with_longest_game(df: pd.DataFrame, is_tournament: bool) -> Optional[pd.DataFrame]:
+    if not is_tournament:
+        return None
+    return df.loc[[np.argmax(df.scores.apply(get_max_int))]]
+def get_opponent_rating_distr_fig(df: pd.DataFrame):
+    fig = plt.figure()
+    plt.title('Opponent rating distribution')
+    sns.histplot(data=df, x="opponent_rating", hue='result')
+    plt.xlabel('Opponent rating')
+    return fig
+def get_opponent_rating_dist_over_time_fig(df: pd.DataFrame, is_tournament: bool):
+    fig, ax = plt.subplots(figsize=(12, 8))
+    plt.title(f'Opponent rating distribution over time')
+    x_key_name = "tournament_end_date" if is_tournament else "event_date"
+    sns.violinplot(data=df,
+                   x=df[x_key_name].dt.year,
+                   y="opponent_rating",
+                   hue="result",
+                   split=True,
+                   inner='points',
+                   cut=1,
+                   ax=ax)
+    plt.xlabel('Competition year')
+    plt.ylabel('Opponent rating')
+    return fig
+def load_match_df(file_path: Path) -> Tuple[pd.DataFrame, bool]:
+    match_type = _check_match_type(file_path.name.split('_')[0])
+    is_tournament = match_type == "tournament"
+    df = pd.read_csv(file_path)
+    df = _rename_columns(df, is_tournament)
+    df = _fix_dtypes(df, is_tournament)
+    return df, is_tournament
+def usatt_rating_analyzer(file_obj):
+    # Load data.
+    df, is_tournament = load_match_df(Path(file_obj.name))
+    # Create outputs.
+    n_competitions_played = get_num_competitions_played(df, is_tournament)
+    n_matches_played = len(df)
+    matches_per_competition_fig = get_matches_per_competition_fig(df, is_tournament)
+    opponent_name_word_cloud_fig = get_opponent_name_word_cloud_fig(df)
+    competition_name_word_cloud_fig = get_competition_name_word_cloud_fig(df, is_tournament)
+    rating_over_time_fig = get_rating_over_time_fig(df, is_tournament)
+    match_with_longest_game = get_match_with_longest_game(df, is_tournament)
+    opponent_rating_distr_fig = get_opponent_rating_distr_fig(df)
+    opponent_rating_dist_over_time_fig = get_opponent_rating_dist_over_time_fig(df, is_tournament)
+    return (n_competitions_played,
+            n_matches_played,
+            matches_per_competition_fig,
+            opponent_name_word_cloud_fig,
+            competition_name_word_cloud_fig,
+            rating_over_time_fig,
+            match_with_longest_game,
+            opponent_rating_distr_fig,
+            opponent_rating_dist_over_time_fig,
+            )
+with gr.Blocks() as demo:
+    gr.Markdown("""# USATT rating analyzer
+    Analyze USA table tennis tournament and league results.
+    ## Downloading match results
+    1. Make sure you are [logged in](https://usatt.simplycompete.com/login/auth).
+    2. Find the *active* player you wish to analyze (e.g.,  [Kanak Jha](https://usatt.simplycompete.com/userAccount/up/3431)).
+    3. Under 'Tournaments' or 'Leagues', click *Download Tournament/League Match History*.
+    """)
+    with gr.Row():
+        with gr.Column():
+            input_file = gr.File(label='USATT Results File', file_types=['file'])
+            btn = gr.Button("Analyze")
+    with gr.Group():
+        with gr.Row():
+            with gr.Column():
+                num_comps_box = gr.Textbox(lines=1, label="Number of competitions (tournaments/leagues) played")
+            with gr.Column():
+                num_matches_box = gr.Textbox(lines=1, label="Number of matches played")
+        rating_over_time_plot = gr.Plot(show_label=False)
+        matches_per_comp_plot = gr.Plot(show_label=False)
+        with gr.Row():
+            with gr.Column():
+                opponent_names_plot = gr.Plot(label="Opponent names")
+            with gr.Column():
+                comp_names_plot = gr.Plot(label="Competition names")
+        match_longest_game_gdf = gr.Dataframe(label="Match with longest game", max_rows=1)
+        opponent_rating_dist_plot = gr.Plot(show_label=False)
+        opponent_rating_dist_over_time_plot = gr.Plot(show_label=False)
+    inputs = [input_file]
+    outputs = [
+        num_comps_box,
+        num_matches_box,
+        matches_per_comp_plot,
+        opponent_names_plot,
+        comp_names_plot,
+        rating_over_time_plot,
+        match_longest_game_gdf,
+        opponent_rating_dist_plot,
+        opponent_rating_dist_over_time_plot,
+    ]
+    btn.click(usatt_rating_analyzer, inputs=inputs, outputs=outputs)
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+pandas
+seaborn
+matplotlib
+wordcloud
+numpy